{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE GeneralisedNewtypeDeriving #-}
module QuickSearch.Internal.Filter
( buildTokenPartitions
, getSearchPartition
, wordTokenize
, toTokenizedTuple
, Token
, Entry(..)
, entryName
, entryUID
, first
) where
import Control.Arrow ( Arrow((&&&)) )
import Data.Bifunctor ( Bifunctor(bimap, first) )
import Data.Char ( isAlphaNum
, isSpace
)
import qualified Data.HashMap.Lazy as HMap
import qualified Data.HashSet as HSet
import Data.Hashable ( Hashable )
import Data.Maybe ( fromMaybe )
import qualified Data.Text as T
type Token = T.Text
newtype Entry name uid = Entry (name, uid)
deriving newtype (Int -> Entry name uid -> ShowS
[Entry name uid] -> ShowS
Entry name uid -> String
(Int -> Entry name uid -> ShowS)
-> (Entry name uid -> String)
-> ([Entry name uid] -> ShowS)
-> Show (Entry name uid)
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
forall name uid.
(Show name, Show uid) =>
Int -> Entry name uid -> ShowS
forall name uid. (Show name, Show uid) => [Entry name uid] -> ShowS
forall name uid. (Show name, Show uid) => Entry name uid -> String
showList :: [Entry name uid] -> ShowS
$cshowList :: forall name uid. (Show name, Show uid) => [Entry name uid] -> ShowS
show :: Entry name uid -> String
$cshow :: forall name uid. (Show name, Show uid) => Entry name uid -> String
showsPrec :: Int -> Entry name uid -> ShowS
$cshowsPrec :: forall name uid.
(Show name, Show uid) =>
Int -> Entry name uid -> ShowS
Show, Entry name uid -> Entry name uid -> Bool
(Entry name uid -> Entry name uid -> Bool)
-> (Entry name uid -> Entry name uid -> Bool)
-> Eq (Entry name uid)
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
forall name uid.
(Eq name, Eq uid) =>
Entry name uid -> Entry name uid -> Bool
/= :: Entry name uid -> Entry name uid -> Bool
$c/= :: forall name uid.
(Eq name, Eq uid) =>
Entry name uid -> Entry name uid -> Bool
== :: Entry name uid -> Entry name uid -> Bool
$c== :: forall name uid.
(Eq name, Eq uid) =>
Entry name uid -> Entry name uid -> Bool
Eq)
instance Bifunctor Entry where
bimap :: (a -> b) -> (c -> d) -> Entry a c -> Entry b d
bimap a -> b
f c -> d
g (Entry (a
name, c
uid)) = (b, d) -> Entry b d
forall name uid. (name, uid) -> Entry name uid
Entry (a -> b
f a
name, c -> d
g c
uid)
entryName :: Entry name uid -> name
entryName :: Entry name uid -> name
entryName (Entry (name
name, uid
_)) = name
name
entryUID :: Entry name uid -> uid
entryUID :: Entry name uid -> uid
entryUID (Entry (name
_, uid
uid)) = uid
uid
wordTokenize
:: T.Text
-> [Token]
wordTokenize :: Text -> [Text]
wordTokenize = Text -> [Text]
T.words (Text -> [Text]) -> (Text -> Text) -> Text -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
T.toCaseFold (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Char) -> Text -> Text
T.map Char -> Char
symToSpace (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Text -> Text
T.filter (Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` String
".'")
where
symToSpace :: Char -> Char
symToSpace :: Char -> Char
symToSpace Char
c | Char -> Bool
isAlphaNum Char
c Bool -> Bool -> Bool
|| Char -> Bool
isSpace Char
c = Char
c
| Bool
otherwise = Char
' '
toTokenizedTuple :: (Hashable uid, Eq uid) => Entry T.Text uid -> ([Token], uid)
toTokenizedTuple :: Entry Text uid -> ([Text], uid)
toTokenizedTuple = Text -> [Text]
wordTokenize (Text -> [Text])
-> (Entry Text uid -> Text) -> Entry Text uid -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Entry Text uid -> Text
forall name uid. Entry name uid -> name
entryName (Entry Text uid -> [Text])
-> (Entry Text uid -> uid) -> Entry Text uid -> ([Text], uid)
forall (a :: * -> * -> *) b c c'.
Arrow a =>
a b c -> a b c' -> a b (c, c')
&&& Entry Text uid -> uid
forall name uid. Entry name uid -> uid
entryUID
buildTokenPartitions
:: (Hashable uid, Eq uid)
=> [Entry T.Text uid]
-> HMap.HashMap Token (HSet.HashSet uid)
buildTokenPartitions :: [Entry Text uid] -> HashMap Text (HashSet uid)
buildTokenPartitions = [([Text], uid)] -> HashMap Text (HashSet uid)
forall uid.
(Hashable uid, Eq uid) =>
[([Text], uid)] -> HashMap Text (HashSet uid)
tokenPartitions ([([Text], uid)] -> HashMap Text (HashSet uid))
-> ([Entry Text uid] -> [([Text], uid)])
-> [Entry Text uid]
-> HashMap Text (HashSet uid)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Entry Text uid -> ([Text], uid))
-> [Entry Text uid] -> [([Text], uid)]
forall a b. (a -> b) -> [a] -> [b]
map Entry Text uid -> ([Text], uid)
forall uid.
(Hashable uid, Eq uid) =>
Entry Text uid -> ([Text], uid)
toTokenizedTuple
tokenPartitions
:: forall uid
. (Hashable uid, Eq uid)
=> [([Token], uid)]
-> HMap.HashMap Token (HSet.HashSet uid)
tokenPartitions :: [([Text], uid)] -> HashMap Text (HashSet uid)
tokenPartitions [([Text], uid)]
tokenizedEntries =
[(Text, HashSet uid)] -> HashMap Text (HashSet uid)
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
HMap.fromList ([(Text, HashSet uid)] -> HashMap Text (HashSet uid))
-> [(Text, HashSet uid)] -> HashMap Text (HashSet uid)
forall a b. (a -> b) -> a -> b
$ [ (Text
tok, (Hashable uid, Eq uid) => Text -> HashSet uid
Text -> HashSet uid
allWith Text
tok) | Text
tok <- [Text]
allTokens ]
where
unstableNub :: [Token] -> [Token]
unstableNub :: [Text] -> [Text]
unstableNub = HashSet Text -> [Text]
forall a. HashSet a -> [a]
HSet.toList (HashSet Text -> [Text])
-> ([Text] -> HashSet Text) -> [Text] -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Text] -> HashSet Text
forall a. (Eq a, Hashable a) => [a] -> HashSet a
HSet.fromList
allTokens :: [Text]
allTokens = [Text] -> [Text]
unstableNub ([Text] -> [Text])
-> ([([Text], uid)] -> [Text]) -> [([Text], uid)] -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (([Text], uid) -> [Text]) -> [([Text], uid)] -> [Text]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap ([Text], uid) -> [Text]
forall a b. (a, b) -> a
fst ([([Text], uid)] -> [Text]) -> [([Text], uid)] -> [Text]
forall a b. (a -> b) -> a -> b
$ [([Text], uid)]
tokenizedEntries
allWith :: (Hashable uid, Eq uid) => Token -> HSet.HashSet uid
allWith :: Text -> HashSet uid
allWith Text
token =
[uid] -> HashSet uid
forall a. (Eq a, Hashable a) => [a] -> HashSet a
HSet.fromList ([uid] -> HashSet uid)
-> ([([Text], uid)] -> [uid]) -> [([Text], uid)] -> HashSet uid
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (([Text], uid) -> uid) -> [([Text], uid)] -> [uid]
forall a b. (a -> b) -> [a] -> [b]
map ([Text], uid) -> uid
forall a b. (a, b) -> b
snd ([([Text], uid)] -> [uid])
-> ([([Text], uid)] -> [([Text], uid)]) -> [([Text], uid)] -> [uid]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (([Text], uid) -> Bool) -> [([Text], uid)] -> [([Text], uid)]
forall a. (a -> Bool) -> [a] -> [a]
filter ((Text
token Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem`) ([Text] -> Bool)
-> (([Text], uid) -> [Text]) -> ([Text], uid) -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([Text], uid) -> [Text]
forall a b. (a, b) -> a
fst) ([([Text], uid)] -> HashSet uid) -> [([Text], uid)] -> HashSet uid
forall a b. (a -> b) -> a -> b
$ [([Text], uid)]
tokenizedEntries
getSearchPartition
:: (Hashable uid, Eq uid)
=> T.Text
-> HMap.HashMap Token (HSet.HashSet uid)
-> HSet.HashSet uid
getSearchPartition :: Text -> HashMap Text (HashSet uid) -> HashSet uid
getSearchPartition Text
name HashMap Text (HashSet uid)
tokenMap =
let tokens :: [Text]
tokens = Text -> [Text]
wordTokenize Text
name
in [HashSet uid] -> HashSet uid
forall a. (Eq a, Hashable a) => [HashSet a] -> HashSet a
HSet.unions ([HashSet uid] -> HashSet uid) -> [HashSet uid] -> HashSet uid
forall a b. (a -> b) -> a -> b
$ (Text -> HashSet uid) -> [Text] -> [HashSet uid]
forall a b. (a -> b) -> [a] -> [b]
map (HashSet uid -> Maybe (HashSet uid) -> HashSet uid
forall a. a -> Maybe a -> a
fromMaybe HashSet uid
forall a. HashSet a
HSet.empty (Maybe (HashSet uid) -> HashSet uid)
-> (Text -> Maybe (HashSet uid)) -> Text -> HashSet uid
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> HashMap Text (HashSet uid) -> Maybe (HashSet uid)
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
`HMap.lookup` HashMap Text (HashSet uid)
tokenMap)) [Text]
tokens