{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE GeneralisedNewtypeDeriving #-}

module QuickSearch.Internal.Matcher
    ( Score
    , Scorer
    , Match(..)
    , QuickSearch(QuickSearch)
    , scoreMatches
    , matchScore
    , matchEntry
    , quickSearchEntries
    , quickSearchTokenFilter
    ) where

import           Control.Arrow                  ( Arrow((&&&)) )
import           Data.Bifunctor                 ( Bifunctor(..) )
import qualified Data.HashMap.Lazy             as HMap
import qualified Data.HashSet                  as HSet
import           Data.Hashable                  ( Hashable )
import           Data.List                      ( sortBy )
import           Data.Ord                       ( Down(Down)
                                                , comparing
                                                )
import           Data.Ratio                     ( Ratio
                                                , denominator
                                                , numerator
                                                )
import qualified Data.Text                     as T

import           QuickSearch.Internal.Filter    ( Entry(..)
                                                , Token
                                                , entryName
                                                , entryUID
                                                , getSearchPartition
                                                )

type Score = Int
type Scorer = (T.Text -> T.Text -> Ratio Int)

-- | Structure associating a Score with an Entry, for holding search results
newtype Match score entry = Match (score, entry)
  deriving newtype (Int -> Match score entry -> ShowS
[Match score entry] -> ShowS
Match score entry -> String
(Int -> Match score entry -> ShowS)
-> (Match score entry -> String)
-> ([Match score entry] -> ShowS)
-> Show (Match score entry)
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
forall score entry.
(Show score, Show entry) =>
Int -> Match score entry -> ShowS
forall score entry.
(Show score, Show entry) =>
[Match score entry] -> ShowS
forall score entry.
(Show score, Show entry) =>
Match score entry -> String
showList :: [Match score entry] -> ShowS
$cshowList :: forall score entry.
(Show score, Show entry) =>
[Match score entry] -> ShowS
show :: Match score entry -> String
$cshow :: forall score entry.
(Show score, Show entry) =>
Match score entry -> String
showsPrec :: Int -> Match score entry -> ShowS
$cshowsPrec :: forall score entry.
(Show score, Show entry) =>
Int -> Match score entry -> ShowS
Show, Match score entry -> Match score entry -> Bool
(Match score entry -> Match score entry -> Bool)
-> (Match score entry -> Match score entry -> Bool)
-> Eq (Match score entry)
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
forall score entry.
(Eq score, Eq entry) =>
Match score entry -> Match score entry -> Bool
/= :: Match score entry -> Match score entry -> Bool
$c/= :: forall score entry.
(Eq score, Eq entry) =>
Match score entry -> Match score entry -> Bool
== :: Match score entry -> Match score entry -> Bool
$c== :: forall score entry.
(Eq score, Eq entry) =>
Match score entry -> Match score entry -> Bool
Eq)

instance Bifunctor Match where
    bimap :: (a -> b) -> (c -> d) -> Match a c -> Match b d
bimap a -> b
f c -> d
g (Match (a
score, c
entry)) = (b, d) -> Match b d
forall score entry. (score, entry) -> Match score entry
Match (a -> b
f a
score, c -> d
g c
entry)

-- | Score accessor for Match
matchScore :: Match Score (Entry name uid) -> Score
matchScore :: Match Int (Entry name uid) -> Int
matchScore (Match (Int
score, Entry name uid
_)) = Int
score

-- | Entry accessor for Match
matchEntry :: Match Score (Entry name uid) -> Entry name uid
matchEntry :: Match Int (Entry name uid) -> Entry name uid
matchEntry (Match (Int
_, entry :: Entry name uid
entry@(Entry (name
_, uid
_)))) = Entry name uid
entry

{- | List of entries to be searched and a HashMap associating tokens with
   HashSets of UIDs related to entries containing the tokens.
-}
newtype QuickSearch uid =
  QuickSearch ([Entry T.Text uid],
                HMap.HashMap Token (HSet.HashSet uid))
  deriving newtype (Int -> QuickSearch uid -> ShowS
[QuickSearch uid] -> ShowS
QuickSearch uid -> String
(Int -> QuickSearch uid -> ShowS)
-> (QuickSearch uid -> String)
-> ([QuickSearch uid] -> ShowS)
-> Show (QuickSearch uid)
forall uid. Show uid => Int -> QuickSearch uid -> ShowS
forall uid. Show uid => [QuickSearch uid] -> ShowS
forall uid. Show uid => QuickSearch uid -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [QuickSearch uid] -> ShowS
$cshowList :: forall uid. Show uid => [QuickSearch uid] -> ShowS
show :: QuickSearch uid -> String
$cshow :: forall uid. Show uid => QuickSearch uid -> String
showsPrec :: Int -> QuickSearch uid -> ShowS
$cshowsPrec :: forall uid. Show uid => Int -> QuickSearch uid -> ShowS
Show)

-- | [Entry name uid] accessor for QuickSearch
quickSearchEntries :: (Hashable uid, Eq uid) => QuickSearch uid -> [Entry T.Text uid]
quickSearchEntries :: QuickSearch uid -> [Entry Text uid]
quickSearchEntries (QuickSearch ([Entry Text uid]
entries, HashMap Text (HashSet uid)
_)) = [Entry Text uid]
entries


-- | tokenFilter accessor for QuickSearch
quickSearchTokenFilter
    :: (Hashable uid, Eq uid) => QuickSearch uid -> HMap.HashMap Token (HSet.HashSet uid)
quickSearchTokenFilter :: QuickSearch uid -> HashMap Text (HashSet uid)
quickSearchTokenFilter (QuickSearch ([Entry Text uid]
_, HashMap Text (HashSet uid)
tokenFilter)) = HashMap Text (HashSet uid)
tokenFilter

{- | Given a string to search, a QuickSearch object, and a similarity function,
   returns potential matches contained in the QuickSearch filters and their
   associated scores, in descending order by score.
-}
scoreMatches
    :: (Hashable uid, Eq uid)
    => T.Text  -- ^ Name or other string to be searched
    -> QuickSearch uid  -- ^ The QuickSearch object to be used
    -> Scorer  -- ^ A string distance function of type (Text -> Text -> Ratio Int)
    -> [Match Score (Entry T.Text uid)]  -- ^ A list of possible matches
scoreMatches :: Text -> QuickSearch uid -> Scorer -> [Match Int (Entry Text uid)]
scoreMatches (Text -> Text
T.toCaseFold -> Text
entry) QuickSearch uid
qs Scorer
scorer =
    let searchSpace :: [Entry Text uid]
searchSpace = (Entry Text uid -> Entry Text uid)
-> [Entry Text uid] -> [Entry Text uid]
forall a b. (a -> b) -> [a] -> [b]
map ((Text -> Text) -> Entry Text uid -> Entry Text uid
forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
first Text -> Text
T.toCaseFold) ([Entry Text uid] -> [Entry Text uid])
-> [Entry Text uid] -> [Entry Text uid]
forall a b. (a -> b) -> a -> b
$ Text -> QuickSearch uid -> [Entry Text uid]
forall uid.
(Hashable uid, Eq uid) =>
Text -> QuickSearch uid -> [Entry Text uid]
pruneSearchSpace Text
entry QuickSearch uid
qs
        scoreEntry :: Entry Text uid -> Int
scoreEntry  = Ratio Int -> Int
toPercent (Ratio Int -> Int)
-> (Entry Text uid -> Ratio Int) -> Entry Text uid -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Scorer
scorer Text
entry (Text -> Ratio Int)
-> (Entry Text uid -> Text) -> Entry Text uid -> Ratio Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Entry Text uid -> Text
forall name uid. Entry name uid -> name
entryName
        results :: [Match Int (Entry Text uid)]
results     = (Entry Text uid -> Match Int (Entry Text uid))
-> [Entry Text uid] -> [Match Int (Entry Text uid)]
forall a b. (a -> b) -> [a] -> [b]
map ((Int, Entry Text uid) -> Match Int (Entry Text uid)
forall score entry. (score, entry) -> Match score entry
Match ((Int, Entry Text uid) -> Match Int (Entry Text uid))
-> (Entry Text uid -> (Int, Entry Text uid))
-> Entry Text uid
-> Match Int (Entry Text uid)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Entry Text uid -> Int
forall uid. Entry Text uid -> Int
scoreEntry (Entry Text uid -> Int)
-> (Entry Text uid -> Entry Text uid)
-> Entry Text uid
-> (Int, Entry Text uid)
forall (a :: * -> * -> *) b c c'.
Arrow a =>
a b c -> a b c' -> a b (c, c')
&&& Entry Text uid -> Entry Text uid
forall a. a -> a
id)) [Entry Text uid]
searchSpace
    in  (Match Int (Entry Text uid)
 -> Match Int (Entry Text uid) -> Ordering)
-> [Match Int (Entry Text uid)] -> [Match Int (Entry Text uid)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy ((Match Int (Entry Text uid) -> Down Int)
-> Match Int (Entry Text uid)
-> Match Int (Entry Text uid)
-> Ordering
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing (Int -> Down Int
forall a. a -> Down a
Down (Int -> Down Int)
-> (Match Int (Entry Text uid) -> Int)
-> Match Int (Entry Text uid)
-> Down Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Match Int (Entry Text uid) -> Int
forall name uid. Match Int (Entry name uid) -> Int
matchScore)) [Match Int (Entry Text uid)]
results
-- ^ Ignore the linter here, this is a performance thing

{- | Given a string and a QuickSearch object, return the list of entries
   from within the QuickSearch that share a full token with the target string
-}
pruneSearchSpace
    :: (Hashable uid, Eq uid)
    => T.Text  -- ^ Name or other string to be searched
    -> QuickSearch uid  -- ^ The QuickSearch object to be used
    -> [Entry T.Text uid]  -- ^ A list of strings to search through and their UIDs
pruneSearchSpace :: Text -> QuickSearch uid -> [Entry Text uid]
pruneSearchSpace Text
entry (QuickSearch ([Entry Text uid]
entries, HashMap Text (HashSet uid)
tokenFilter)) =
    let uidPartition :: HashSet uid
uidPartition = Text -> HashMap Text (HashSet uid) -> HashSet uid
forall uid.
(Hashable uid, Eq uid) =>
Text -> HashMap Text (HashSet uid) -> HashSet uid
getSearchPartition Text
entry HashMap Text (HashSet uid)
tokenFilter
    in  (Entry Text uid -> Bool) -> [Entry Text uid] -> [Entry Text uid]
forall a. (a -> Bool) -> [a] -> [a]
filter ((uid -> HashSet uid -> Bool
forall a. (Eq a, Hashable a) => a -> HashSet a -> Bool
`HSet.member` HashSet uid
uidPartition) (uid -> Bool) -> (Entry Text uid -> uid) -> Entry Text uid -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Entry Text uid -> uid
forall name uid. Entry name uid -> uid
entryUID) [Entry Text uid]
entries

toPercent :: Ratio Int -> Int
toPercent :: Ratio Int -> Int
toPercent Ratio Int
r = Double -> Int
forall a b. (RealFrac a, Integral b) => a -> b
floor (Double -> Int) -> Double -> Int
forall a b. (a -> b) -> a -> b
$ (Double
num Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
denom) Double -> Double -> Double
forall a. Num a => a -> a -> a
* (Double
100 :: Double)
  where
    ratioToIntPair :: Ratio Int -> (Double, Double)
ratioToIntPair = Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Double) -> (Ratio Int -> Int) -> Ratio Int -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Ratio Int -> Int
forall a. Ratio a -> a
numerator (Ratio Int -> Double)
-> (Ratio Int -> Double) -> Ratio Int -> (Double, Double)
forall (a :: * -> * -> *) b c c'.
Arrow a =>
a b c -> a b c' -> a b (c, c')
&&& Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Double) -> (Ratio Int -> Int) -> Ratio Int -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Ratio Int -> Int
forall a. Ratio a -> a
denominator
    (Double
num, Double
denom)   = Ratio Int -> (Double, Double)
ratioToIntPair Ratio Int
r