{-# OPTIONS_GHC -funbox-strict-fields #-}
{-# LANGUAGE BangPatterns #-}
-- |
-- Module      : Data.Unicode.Internal.NormalizeStream
-- Copyright   : (c) 2016 Harendra Kumar
--
-- License     : BSD-style
-- Maintainer  : harendra.kumar@gmail.com
-- Stability   : experimental
--
-- Stream based normalization.
--
module Data.Unicode.Internal.NormalizeStream
    (
      D.DecomposeMode(..)
    , stream
    , unstream
    , unstreamC
    )
    where

import           Control.Monad                          (ap)
import           Data.Char                              (chr, ord)
import           Data.List                              (sortBy)
import           Data.Ord                               (comparing)
import qualified Data.Text.Array                        as A
import           Data.Text.Internal                     (Text (..))
import qualified Data.Text.Internal.Encoding.Utf16      as U16
import           Data.Text.Internal.Fusion.Size         (betweenSize,
                                                         upperBound)
import           Data.Text.Internal.Fusion.Types        (Step (..), Stream (..))
import           Data.Text.Internal.Private             (runText)
import           Data.Text.Internal.Unsafe.Char         (unsafeWrite)
import           Data.Text.Internal.Unsafe.Char         (unsafeChr)
import           Data.Text.Internal.Unsafe.Shift        (shiftR)
import           GHC.ST                                 (ST (..))

import qualified Data.Unicode.Properties.CombiningClass  as CC
import qualified Data.Unicode.Properties.Compositions    as C
import qualified Data.Unicode.Properties.Decompose       as D
import qualified Data.Unicode.Properties.DecomposeHangul as H

-------------------------------------------------------------------------------
-- Reorder buffer to hold characters till the next starter boundary
-------------------------------------------------------------------------------

data ReBuf = Empty | One {-# UNPACK #-} !Char | Many [Char]

writeStr :: A.MArray s -> Int -> [Char] -> ST s Int
writeStr :: MArray s -> Int -> [Char] -> ST s Int
writeStr marr :: MArray s
marr di :: Int
di str :: [Char]
str = Int -> [Char] -> ST s Int
go Int
di [Char]
str
    where
        go :: Int -> [Char] -> ST s Int
go i :: Int
i [] = Int -> ST s Int
forall (m :: * -> *) a. Monad m => a -> m a
return Int
i
        go i :: Int
i (c :: Char
c : cs :: [Char]
cs) = do
            Int
n <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
marr Int
i Char
c
            Int -> [Char] -> ST s Int
go (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n) [Char]
cs

{-# INLINE writeReorderBuffer #-}
writeReorderBuffer :: A.MArray s -> Int -> ReBuf -> ST s Int
writeReorderBuffer :: MArray s -> Int -> ReBuf -> ST s Int
writeReorderBuffer _ di :: Int
di Empty = Int -> ST s Int
forall (m :: * -> *) a. Monad m => a -> m a
return Int
di

writeReorderBuffer marr :: MArray s
marr di :: Int
di (One c :: Char
c) = do
    Int
n <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
marr Int
di Char
c
    Int -> ST s Int
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
di Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n)

writeReorderBuffer marr :: MArray s
marr di :: Int
di (Many str :: [Char]
str) = MArray s -> Int -> [Char] -> ST s Int
forall s. MArray s -> Int -> [Char] -> ST s Int
writeStr MArray s
marr Int
di [Char]
str

-------------------------------------------------------------------------------
-- Decomposition of Hangul characters is done algorithmically
-------------------------------------------------------------------------------

-- {-# INLINE decomposeCharHangul #-}
decomposeCharHangul :: A.MArray s -> Int -> Char -> ST s (Int, ReBuf)
decomposeCharHangul :: MArray s -> Int -> Char -> ST s (Int, ReBuf)
decomposeCharHangul marr :: MArray s
marr j :: Int
j c :: Char
c = do
    case Char -> Either (Char, Char) (Char, Char, Char)
D.decomposeCharHangul Char
c of
        Left  (l :: Char
l, v :: Char
v)    -> do
            Int
n1 <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
marr Int
j Char
l
            Int
n2 <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
marr (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n1) Char
v
            (Int, ReBuf) -> ST s (Int, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return ((Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n2), ReBuf
Empty)
        Right (l :: Char
l, v :: Char
v, t :: Char
t) -> do
            Int
n1 <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
marr Int
j Char
l
            Int
n2 <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
marr (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n1) Char
v
            Int
n3 <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
marr (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n2) Char
t
            (Int, ReBuf) -> ST s (Int, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n2 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n3, ReBuf
Empty)

{-# INLINE decomposeChar #-}
decomposeChar
    :: D.DecomposeMode
    -> A.MArray s       -- destination array for decomposition
    -> Int              -- array index
    -> ReBuf            -- reorder buffer
    -> Char             -- char to be decomposed
    -> ST s (Int, ReBuf)
decomposeChar :: DecomposeMode
-> MArray s -> Int -> ReBuf -> Char -> ST s (Int, ReBuf)
decomposeChar _ marr :: MArray s
marr i :: Int
i reBuf :: ReBuf
reBuf c :: Char
c | Char -> Bool
D.isHangul Char
c = do
    Int
j <- MArray s -> Int -> ReBuf -> ST s Int
forall s. MArray s -> Int -> ReBuf -> ST s Int
writeReorderBuffer MArray s
marr Int
i ReBuf
reBuf
    MArray s -> Int -> Char -> ST s (Int, ReBuf)
forall s. MArray s -> Int -> Char -> ST s (Int, ReBuf)
decomposeCharHangul MArray s
marr Int
j Char
c

-------------------------------------------------------------------------------
-- Decomposition of characters other than Hangul
-------------------------------------------------------------------------------

decomposeChar mode :: DecomposeMode
mode marr :: MArray s
marr index :: Int
index reBuf :: ReBuf
reBuf ch :: Char
ch = do
    -- TODO: return fully decomposed form
    case DecomposeMode -> Char -> DecomposeResult
D.isDecomposable DecomposeMode
mode Char
ch of
      D.FalseA -> MArray s -> Int -> ReBuf -> Char -> ST s (Int, ReBuf)
forall s. MArray s -> Int -> ReBuf -> Char -> ST s (Int, ReBuf)
reorder MArray s
marr Int
index ReBuf
reBuf Char
ch
      D.TrueA  -> MArray s -> Int -> ReBuf -> [Char] -> ST s (Int, ReBuf)
forall s. MArray s -> Int -> ReBuf -> [Char] -> ST s (Int, ReBuf)
decomposeAll MArray s
marr Int
index ReBuf
reBuf (DecomposeMode -> Char -> [Char]
D.decomposeChar DecomposeMode
mode Char
ch)
      _ -> MArray s -> Int -> ReBuf -> Char -> ST s (Int, ReBuf)
forall s. MArray s -> Int -> ReBuf -> Char -> ST s (Int, ReBuf)
reorder MArray s
marr Int
index ReBuf
reBuf Char
ch

    where
        {-# INLINE decomposeAll #-}
        decomposeAll :: MArray s -> Int -> ReBuf -> [Char] -> ST s (Int, ReBuf)
decomposeAll _ i :: Int
i rbuf :: ReBuf
rbuf [] = (Int, ReBuf) -> ST s (Int, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i, ReBuf
rbuf)
        decomposeAll arr :: MArray s
arr i :: Int
i rbuf :: ReBuf
rbuf (x :: Char
x : xs :: [Char]
xs)  =
            case DecomposeMode -> Char -> DecomposeResult
D.isDecomposable DecomposeMode
mode Char
x of
                D.TrueA  -> do
                    (i' :: Int
i', rbuf' :: ReBuf
rbuf') <- MArray s -> Int -> ReBuf -> [Char] -> ST s (Int, ReBuf)
decomposeAll MArray s
arr Int
i ReBuf
rbuf
                                                (DecomposeMode -> Char -> [Char]
D.decomposeChar DecomposeMode
mode Char
x)
                    MArray s -> Int -> ReBuf -> [Char] -> ST s (Int, ReBuf)
decomposeAll MArray s
arr Int
i' ReBuf
rbuf' [Char]
xs
                _ -> do
                    -- XXX calling reorder is wrong if decomposition results in
                    -- a further decomposable Hangul char. In that case we will
                    -- not go through the Hangul decompose for that char.
                    -- To be strictly correct we have to call decomposeChar
                    -- recursively here.
                    (i' :: Int
i', rbuf' :: ReBuf
rbuf') <- MArray s -> Int -> ReBuf -> Char -> ST s (Int, ReBuf)
forall s. MArray s -> Int -> ReBuf -> Char -> ST s (Int, ReBuf)
reorder MArray s
arr Int
i ReBuf
rbuf Char
x
                    MArray s -> Int -> ReBuf -> [Char] -> ST s (Int, ReBuf)
decomposeAll MArray s
arr Int
i' ReBuf
rbuf' [Char]
xs

        -- Unicode 9.0.0: 3.11
        -- D108 Reorderable pair: Two adjacent characters A and B in a coded
        -- character sequence <A,B> are a Reorderable Pair if and only if
        -- ccc(A) > ccc(B) > 0.
        --
        -- (array) (array index) (reorder buffer) (input char)
        {-# INLINE reorder #-}
        reorder :: MArray s -> Int -> ReBuf -> Char -> ST s (Int, ReBuf)
reorder _ i :: Int
i Empty c :: Char
c = (Int, ReBuf) -> ST s (Int, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i, Char -> ReBuf
One Char
c)

        -- input char is a starter, flush the reorder buffer
        reorder arr :: MArray s
arr i :: Int
i (One c0 :: Char
c0) c :: Char
c | Bool -> Bool
not (Char -> Bool
CC.isCombining Char
c) = do
            Int
n1 <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
arr Int
i Char
c0
            Int
n2 <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
arr (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n1) Char
c
            (Int, ReBuf) -> ST s (Int, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return ((Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n2), ReBuf
Empty)

        -- input char is combining and there is a starter char in the buffer
        -- flush the starter char and add the combining char to the buffer
        reorder arr :: MArray s
arr i :: Int
i (One c0 :: Char
c0) c :: Char
c | Bool -> Bool
not (Char -> Bool
CC.isCombining Char
c0) = do
            Int
n <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
arr Int
i Char
c0
            (Int, ReBuf) -> ST s (Int, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n, Char -> ReBuf
One Char
c)

        -- optimized ordering for common case of two combining chars
        -- XXX replace many with Two here
        reorder  _ i :: Int
i (One c0 :: Char
c0) c :: Char
c = (Int, ReBuf) -> ST s (Int, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i, [Char] -> ReBuf
Many [Char]
orderedPair)
            where
                -- {-# INLINE orderedPair #-}
                orderedPair :: [Char]
orderedPair =
                    case Char -> Char -> Bool
inOrder Char
c0 Char
c of
                        True  -> [Char
c0, Char
c]
                        False -> [Char
c, Char
c0]

                inOrder :: Char -> Char -> Bool
inOrder c1 :: Char
c1 c2 :: Char
c2 =
                    Char -> Int
CC.getCombiningClass Char
c1 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Char -> Int
CC.getCombiningClass Char
c2

        -- input char is a starter, flush the reorder buffer
        reorder arr :: MArray s
arr i :: Int
i rbuf :: ReBuf
rbuf c :: Char
c | Bool -> Bool
not (Char -> Bool
CC.isCombining Char
c) = do
            Int
j <- MArray s -> Int -> ReBuf -> ST s Int
forall s. MArray s -> Int -> ReBuf -> ST s Int
writeReorderBuffer MArray s
arr Int
i ReBuf
rbuf
            Int
n <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
arr Int
j Char
c
            (Int, ReBuf) -> ST s (Int, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n, ReBuf
Empty)

        -- unoptimized generic sort for more than two combining chars
        reorder _ i :: Int
i (Many str :: [Char]
str) c :: Char
c = (Int, ReBuf) -> ST s (Int, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i, [Char] -> ReBuf
Many ([Char] -> [Char]
sortCluster ([Char]
str [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char
c])))
            where
                {-# INLINE sortCluster #-}
                sortCluster :: [Char] -> [Char]
sortCluster =   ((Char, Int) -> Char) -> [(Char, Int)] -> [Char]
forall a b. (a -> b) -> [a] -> [b]
map (Char, Int) -> Char
forall a b. (a, b) -> a
fst
                              ([(Char, Int)] -> [Char])
-> ([Char] -> [(Char, Int)]) -> [Char] -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Char, Int) -> (Char, Int) -> Ordering)
-> [(Char, Int)] -> [(Char, Int)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy (((Char, Int) -> Int) -> (Char, Int) -> (Char, Int) -> Ordering
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing (Char, Int) -> Int
forall a b. (a, b) -> b
snd)
                              ([(Char, Int)] -> [(Char, Int)])
-> ([Char] -> [(Char, Int)]) -> [Char] -> [(Char, Int)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> (Char, Int)) -> [Char] -> [(Char, Int)]
forall a b. (a -> b) -> [a] -> [b]
map ((Char -> Int -> (Char, Int))
-> (Char -> Int) -> Char -> (Char, Int)
forall (m :: * -> *) a b. Monad m => m (a -> b) -> m a -> m b
ap (,) Char -> Int
CC.getCombiningClass)

-- | /O(n)/ Convert a 'Text' into a 'Stream Char'.
stream :: Text -> Stream Char
stream :: Text -> Stream Char
stream (Text arr :: Array
arr off :: Int
off len :: Int
len) = (Int -> Step Int Char) -> Int -> Size -> Stream Char
forall a s. (s -> Step s a) -> s -> Size -> Stream a
Stream Int -> Step Int Char
next Int
off (Int -> Int -> Size
betweenSize (Int
len Int -> Int -> Int
forall a. UnsafeShift a => a -> Int -> a
`shiftR` 1) Int
len)
    where
      !end :: Int
end = Int
offInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
len
      {-# INLINE next #-}
      next :: Int -> Step Int Char
next !Int
i
          | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
end                   = Step Int Char
forall s a. Step s a
Done
          -- shift generates only two branches instead of three in case of
          -- range check, works quite a bit faster with llvm backend.
          | (Word16
n Word16 -> Int -> Word16
forall a. UnsafeShift a => a -> Int -> a
`shiftR` 10) Word16 -> Word16 -> Bool
forall a. Eq a => a -> a -> Bool
== 0x36    = Char -> Int -> Step Int Char
forall s a. a -> s -> Step s a
Yield (Word16 -> Word16 -> Char
U16.chr2 Word16
n Word16
n2) (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 2)
          | Bool
otherwise                  = Char -> Int -> Step Int Char
forall s a. a -> s -> Step s a
Yield (Word16 -> Char
unsafeChr Word16
n) (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1)
          where
            n :: Word16
n  = Array -> Int -> Word16
A.unsafeIndex Array
arr Int
i
            n2 :: Word16
n2 = Array -> Int -> Word16
A.unsafeIndex Array
arr (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1)
{-# INLINE [0] stream #-}

-- | /O(n)/ Convert a 'Stream Char' into a decompose-normalized 'Text'.
unstream :: D.DecomposeMode -> Stream Char -> Text
unstream :: DecomposeMode -> Stream Char -> Text
unstream mode :: DecomposeMode
mode (Stream next0 :: s -> Step s Char
next0 s0 :: s
s0 len :: Size
len) = (forall s. (MArray s -> Int -> ST s Text) -> ST s Text) -> Text
runText ((forall s. (MArray s -> Int -> ST s Text) -> ST s Text) -> Text)
-> (forall s. (MArray s -> Int -> ST s Text) -> ST s Text) -> Text
forall a b. (a -> b) -> a -> b
$ \done :: MArray s -> Int -> ST s Text
done -> do
  -- Before encoding each char we perform a buffer realloc check assuming
  -- worst case encoding size of two 16-bit units for the char. Just add an
  -- extra space to the buffer so that we do not end up reallocating even when
  -- all the chars are encoded as single unit.
  let margin :: Int
margin = 1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
maxDecomposeLen
      mlen :: Int
mlen = (Int -> Size -> Int
upperBound 4 Size
len Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
margin)
  MArray s
arr0 <- Int -> ST s (MArray s)
forall s. Int -> ST s (MArray s)
A.new Int
mlen
  let outer :: MArray s -> Int -> s -> Int -> ReBuf -> ST s Text
outer !MArray s
arr !Int
maxi = s -> Int -> ReBuf -> ST s Text
encode
       where
        -- keep the common case loop as small as possible
        encode :: s -> Int -> ReBuf -> ST s Text
encode !s
si !Int
di rbuf :: ReBuf
rbuf =
            -- simply check for the worst case
            if Int
maxi Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
di Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
margin
            then s -> Int -> ReBuf -> ST s Text
realloc s
si Int
di ReBuf
rbuf
            else
                case s -> Step s Char
next0 s
si of
                    Done -> do
                        Int
di' <- MArray s -> Int -> ReBuf -> ST s Int
forall s. MArray s -> Int -> ReBuf -> ST s Int
writeReorderBuffer MArray s
arr Int
di ReBuf
rbuf
                        MArray s -> Int -> ST s Text
done MArray s
arr Int
di'
                    Skip si' :: s
si'    -> s -> Int -> ReBuf -> ST s Text
encode s
si' Int
di ReBuf
rbuf
                    Yield c :: Char
c si' :: s
si' -> do
                                (di' :: Int
di', rbuf' :: ReBuf
rbuf') <- DecomposeMode
-> MArray s -> Int -> ReBuf -> Char -> ST s (Int, ReBuf)
forall s.
DecomposeMode
-> MArray s -> Int -> ReBuf -> Char -> ST s (Int, ReBuf)
decomposeChar DecomposeMode
mode MArray s
arr Int
di ReBuf
rbuf Char
c
                                s -> Int -> ReBuf -> ST s Text
encode s
si' Int
di' ReBuf
rbuf'
                                -- n <- unsafeWrite arr di c
                                -- encode si' (di + n) rbuf

        -- keep uncommon case separate from the common case code
        {-# NOINLINE realloc #-}
        realloc :: s -> Int -> ReBuf -> ST s Text
realloc !s
si !Int
di rbuf :: ReBuf
rbuf = do
            let newlen :: Int
newlen = Int
maxi Int -> Int -> Int
forall a. Num a => a -> a -> a
* 2
            MArray s
arr' <- Int -> ST s (MArray s)
forall s. Int -> ST s (MArray s)
A.new Int
newlen
            MArray s -> Int -> MArray s -> Int -> Int -> ST s ()
forall s. MArray s -> Int -> MArray s -> Int -> Int -> ST s ()
A.copyM MArray s
arr' 0 MArray s
arr 0 Int
di
            MArray s -> Int -> s -> Int -> ReBuf -> ST s Text
outer MArray s
arr' (Int
newlen Int -> Int -> Int
forall a. Num a => a -> a -> a
- 1) s
si Int
di ReBuf
rbuf

  MArray s -> Int -> s -> Int -> ReBuf -> ST s Text
outer MArray s
arr0 (Int
mlen Int -> Int -> Int
forall a. Num a => a -> a -> a
- 1) s
s0 0 ReBuf
Empty
{-# INLINE [0] unstream #-}

-- we can generate this from UCD
maxDecomposeLen :: Int
maxDecomposeLen :: Int
maxDecomposeLen = 32

-------------------------------------------------------------------------------
-- Composition
-------------------------------------------------------------------------------

composeAndWrite
    :: A.MArray s
    -> Int
    -> Char
    -> ReBuf
    -> Char
    -> ST s (Int, Char) -- return new index, new starter
composeAndWrite :: MArray s -> Int -> Char -> ReBuf -> Char -> ST s (Int, Char)
composeAndWrite arr :: MArray s
arr di :: Int
di st1 :: Char
st1 Empty st2 :: Char
st2 = do
    Int
n <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
arr Int
di Char
st1
    (Int, Char) -> ST s (Int, Char)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
di Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n, Char
st2)

composeAndWrite arr :: MArray s
arr di :: Int
di st1 :: Char
st1 (One c :: Char
c) st2 :: Char
st2 =
    MArray s -> Int -> Char -> [Char] -> Char -> ST s (Int, Char)
forall s.
MArray s -> Int -> Char -> [Char] -> Char -> ST s (Int, Char)
composeAndWrite' MArray s
arr Int
di Char
st1 [Char
c] Char
st2

composeAndWrite arr :: MArray s
arr di :: Int
di st1 :: Char
st1 (Many str :: [Char]
str) st2 :: Char
st2 =
    MArray s -> Int -> Char -> [Char] -> Char -> ST s (Int, Char)
forall s.
MArray s -> Int -> Char -> [Char] -> Char -> ST s (Int, Char)
composeAndWrite' MArray s
arr Int
di Char
st1 [Char]
str Char
st2

composeAndWrite'
    :: A.MArray s
    -> Int
    -> Char
    -> [Char]
    -> Char
    -> ST s (Int, Char)
composeAndWrite' :: MArray s -> Int -> Char -> [Char] -> Char -> ST s (Int, Char)
composeAndWrite' arr :: MArray s
arr di :: Int
di st1 :: Char
st1 str :: [Char]
str st2 :: Char
st2 = Int -> Char -> [Char] -> Int -> [Char] -> ST s (Int, Char)
go Int
di Char
st1 [] 0 [Char]
str
    where
        -- arguments: index, starter, uncombined chars,
        -- cc of prev uncombined char, unprocessed str
        go :: Int -> Char -> [Char] -> Int -> [Char] -> ST s (Int, Char)
go i :: Int
i st :: Char
st [] _ [] =
                case Char -> Char -> Maybe Char
C.composePair Char
st Char
st2 of
                    Just x :: Char
x  -> (Int, Char) -> ST s (Int, Char)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i, Char
x)
                    Nothing -> do
                        Int
n <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
arr Int
i Char
st
                        (Int, Char) -> ST s (Int, Char)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n, Char
st2)

        go i :: Int
i st :: Char
st uncs :: [Char]
uncs _ [] = do
            Int
j <- MArray s -> Int -> [Char] -> ST s Int
forall s. MArray s -> Int -> [Char] -> ST s Int
writeStr MArray s
arr Int
i (Char
st Char -> [Char] -> [Char]
forall a. a -> [a] -> [a]
: [Char]
uncs)
            (Int, Char) -> ST s (Int, Char)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
j, Char
st2)

        go i :: Int
i st :: Char
st [] _ (c :: Char
c : cs :: [Char]
cs) = do
            case Char -> Char -> Maybe Char
C.composePair Char
st Char
c of
                Just x :: Char
x  -> Int -> Char -> [Char] -> Int -> [Char] -> ST s (Int, Char)
go Int
i Char
x [] 0 [Char]
cs
                Nothing -> do
                    Int -> Char -> [Char] -> Int -> [Char] -> ST s (Int, Char)
go Int
i Char
st [Char
c] (Char -> Int
CC.getCombiningClass Char
c) [Char]
cs

        go i :: Int
i st :: Char
st uncs :: [Char]
uncs cc :: Int
cc (c :: Char
c : cs :: [Char]
cs) = do
            let ccc :: Int
ccc = Char -> Int
CC.getCombiningClass Char
c
            if Int
ccc Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
cc then
                case Char -> Char -> Maybe Char
C.composePair Char
st Char
c of
                    Just x :: Char
x  -> Int -> Char -> [Char] -> Int -> [Char] -> ST s (Int, Char)
go Int
i Char
x [Char]
uncs Int
cc [Char]
cs
                    Nothing -> do
                        Int -> Char -> [Char] -> Int -> [Char] -> ST s (Int, Char)
go Int
i Char
st ([Char]
uncs [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char
c]) Int
ccc [Char]
cs
            else Int -> Char -> [Char] -> Int -> [Char] -> ST s (Int, Char)
go Int
i Char
st ([Char]
uncs [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char
c]) Int
ccc [Char]
cs

writeStarterRbuf :: A.MArray s
                 -> Int
                 -> Maybe Char
                 -> ReBuf
                 -> ST s Int
writeStarterRbuf :: MArray s -> Int -> Maybe Char -> ReBuf -> ST s Int
writeStarterRbuf marr :: MArray s
marr di :: Int
di st :: Maybe Char
st rbuf :: ReBuf
rbuf =
    case Maybe Char
st of
        Nothing -> MArray s -> Int -> ReBuf -> ST s Int
forall s. MArray s -> Int -> ReBuf -> ST s Int
writeReorderBuffer MArray s
marr Int
di ReBuf
rbuf
        Just starter :: Char
starter ->
            -- XXX null char hack
            MArray s -> Int -> Char -> ReBuf -> Char -> ST s (Int, Char)
forall s.
MArray s -> Int -> Char -> ReBuf -> Char -> ST s (Int, Char)
composeAndWrite MArray s
marr Int
di Char
starter ReBuf
rbuf '\0' ST s (Int, Char) -> ((Int, Char) -> ST s Int) -> ST s Int
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= (Int -> ST s Int
forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> ST s Int)
-> ((Int, Char) -> Int) -> (Int, Char) -> ST s Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int, Char) -> Int
forall a b. (a, b) -> a
fst)

-------------------------------------------------------------------------------
-- Composition of Hangul Jamo characters, done algorithmically
-------------------------------------------------------------------------------

-- Hold an L to wait for V, hold an LV to wait for T.
data JamoBuf
    = JamoEmpty
    | JamoLIndex {-# UNPACK #-} !Int
    | JamoLV     {-# UNPACK #-} !Char

{-# INLINE writeJamoBuf #-}
writeJamoBuf :: A.MArray s -> Int -> JamoBuf -> ST s Int
writeJamoBuf :: MArray s -> Int -> JamoBuf -> ST s Int
writeJamoBuf _ di :: Int
di JamoEmpty = Int -> ST s Int
forall (m :: * -> *) a. Monad m => a -> m a
return Int
di

writeJamoBuf marr :: MArray s
marr di :: Int
di (JamoLIndex i :: Int
i) = do
    Int
n <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
marr Int
di (Int -> Char
chr (Int
D.jamoLFirst Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i))
    Int -> ST s Int
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
di Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n)

writeJamoBuf marr :: MArray s
marr di :: Int
di (JamoLV c :: Char
c) = do
    Int
n <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
marr Int
di Char
c
    Int -> ST s Int
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
di Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n)

-- TODO Unify compose and decompose if possible with good perf
-- TODO try unifying st, rbuf
-- TODO try using Either for (st, rbuf)/jbuf
-- or we can use different functions for hangul and non-hangul composition with
-- diff signatures. In an outer function we check if the char is hangul and
-- flush and switch the buffer before calling the appropriate function.

-- If we are composing we do not need to first decompose Hangul. We can just
-- compose assuming there could be some partially composed syllables e.g. LV
-- syllable followed by a jamo T. We need to compose this case as well.
--
-- XXX The unicode normalization test suite does not seem to have tests for a
-- LV composed hangul syllable followed by a jamo T.

{-# INLINE composeChar #-}

composeChar
    :: D.DecomposeMode
    -> A.MArray s       -- destination array for decomposition
    -> Int              -- array index
    -> Maybe Char       -- last starter
    -> ReBuf            -- reorder buffer
    -> JamoBuf          -- jamo buffer
    -> Char             -- char to be decomposed
    -> ST s (Int, Maybe Char, ReBuf, JamoBuf)
    -- ^ index, starter, reorder buf, jamobuf
composeChar :: DecomposeMode
-> MArray s
-> Int
-> Maybe Char
-> ReBuf
-> JamoBuf
-> Char
-> ST s (Int, Maybe Char, ReBuf, JamoBuf)
composeChar _ marr :: MArray s
marr index :: Int
index st :: Maybe Char
st rbuf :: ReBuf
rbuf jbuf :: JamoBuf
jbuf ch :: Char
ch | Char -> Bool
H.isHangul Char
ch Bool -> Bool -> Bool
|| Char -> Bool
H.isJamo Char
ch = do
    Int
j <- MArray s -> Int -> Maybe Char -> ReBuf -> ST s Int
forall s. MArray s -> Int -> Maybe Char -> ReBuf -> ST s Int
writeStarterRbuf MArray s
marr Int
index Maybe Char
st ReBuf
rbuf
    (k :: Int
k, jbuf' :: JamoBuf
jbuf') <- if Char -> Bool
H.isJamo Char
ch then
        MArray s -> Int -> JamoBuf -> Char -> ST s (Int, JamoBuf)
forall s. MArray s -> Int -> JamoBuf -> Char -> ST s (Int, JamoBuf)
composeCharJamo MArray s
marr Int
j JamoBuf
jbuf Char
ch
    else
        MArray s -> Int -> JamoBuf -> Char -> ST s (Int, JamoBuf)
forall s. MArray s -> Int -> JamoBuf -> Char -> ST s (Int, JamoBuf)
composeCharHangul MArray s
marr Int
j JamoBuf
jbuf Char
ch
    (Int, Maybe Char, ReBuf, JamoBuf)
-> ST s (Int, Maybe Char, ReBuf, JamoBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
k, Maybe Char
forall a. Maybe a
Nothing, ReBuf
Empty, JamoBuf
jbuf')
    where
        composeCharJamo :: MArray s -> Int -> JamoBuf -> Char -> ST s (Int, JamoBuf)
composeCharJamo arr :: MArray s
arr i :: Int
i JamoEmpty c :: Char
c =
            case Char -> Maybe Int
H.jamoLIndex Char
c of
                Just li :: Int
li -> (Int, JamoBuf) -> ST s (Int, JamoBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i, Int -> JamoBuf
JamoLIndex Int
li)
                Nothing -> do
                    Int
n <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
arr Int
i Char
c
                    (Int, JamoBuf) -> ST s (Int, JamoBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n, JamoBuf
JamoEmpty)

        composeCharJamo arr :: MArray s
arr i :: Int
i jb :: JamoBuf
jb@(JamoLIndex li :: Int
li) c :: Char
c =
            case Char -> Maybe Int
H.jamoVIndex Char
c of
                Just vi :: Int
vi -> do
                    let lvi :: Int
lvi = Int
li Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
H.jamoNCount Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
vi Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
H.jamoTCount
                    (Int, JamoBuf) -> ST s (Int, JamoBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i, Char -> JamoBuf
JamoLV (Int -> Char
chr (Int
H.hangulFirst Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
lvi)))
                Nothing -> do
                    Int
ix <- MArray s -> Int -> JamoBuf -> ST s Int
forall s. MArray s -> Int -> JamoBuf -> ST s Int
writeJamoBuf MArray s
arr Int
i JamoBuf
jb
                    MArray s -> Int -> JamoBuf -> Char -> ST s (Int, JamoBuf)
composeCharJamo MArray s
arr Int
ix JamoBuf
JamoEmpty Char
c

        composeCharJamo arr :: MArray s
arr i :: Int
i jb :: JamoBuf
jb@(JamoLV lv :: Char
lv) c :: Char
c =
            case Char -> Maybe Int
H.jamoTIndex Char
c of
                Just ti :: Int
ti -> do
                    Int
n <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
arr Int
i (Int -> Char
chr ((Char -> Int
ord Char
lv) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
ti))
                    (Int, JamoBuf) -> ST s (Int, JamoBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n, JamoBuf
JamoEmpty)
                Nothing -> do
                    Int
ix <- MArray s -> Int -> JamoBuf -> ST s Int
forall s. MArray s -> Int -> JamoBuf -> ST s Int
writeJamoBuf MArray s
arr Int
i JamoBuf
jb
                    MArray s -> Int -> JamoBuf -> Char -> ST s (Int, JamoBuf)
composeCharJamo MArray s
arr Int
ix JamoBuf
JamoEmpty Char
c

        composeCharHangul :: MArray s -> Int -> JamoBuf -> Char -> ST s (Int, JamoBuf)
composeCharHangul arr :: MArray s
arr i :: Int
i jb :: JamoBuf
jb c :: Char
c = do
            Int
ix <- MArray s -> Int -> JamoBuf -> ST s Int
forall s. MArray s -> Int -> JamoBuf -> ST s Int
writeJamoBuf MArray s
arr Int
i JamoBuf
jb
            case Char -> Bool
H.isHangulLV Char
c of
                True -> (Int, JamoBuf) -> ST s (Int, JamoBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
ix, Char -> JamoBuf
JamoLV Char
c)
                False -> do
                    Int
n <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
arr Int
ix Char
c
                    (Int, JamoBuf) -> ST s (Int, JamoBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n, JamoBuf
JamoEmpty)

-------------------------------------------------------------------------------
-- Composition of characters other than Hangul
-------------------------------------------------------------------------------

composeChar mode :: DecomposeMode
mode marr :: MArray s
marr index :: Int
index starter :: Maybe Char
starter reBuf :: ReBuf
reBuf jbuf :: JamoBuf
jbuf ch :: Char
ch = do
    Int
index' <- MArray s -> Int -> JamoBuf -> ST s Int
forall s. MArray s -> Int -> JamoBuf -> ST s Int
writeJamoBuf MArray s
marr Int
index JamoBuf
jbuf
    case DecomposeMode -> Char -> DecomposeResult
D.isDecomposable DecomposeMode
mode Char
ch of
        D.FalseA -> do
            (i :: Int
i, st :: Maybe Char
st, rbuf :: ReBuf
rbuf) <- MArray s
-> Int
-> Maybe Char
-> ReBuf
-> Char
-> ST s (Int, Maybe Char, ReBuf)
forall s.
MArray s
-> Int
-> Maybe Char
-> ReBuf
-> Char
-> ST s (Int, Maybe Char, ReBuf)
reorder MArray s
marr Int
index' Maybe Char
starter ReBuf
reBuf Char
ch
            (Int, Maybe Char, ReBuf, JamoBuf)
-> ST s (Int, Maybe Char, ReBuf, JamoBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i, Maybe Char
st, ReBuf
rbuf, JamoBuf
JamoEmpty)
        D.TrueA  -> do
            MArray s
-> Int
-> Maybe Char
-> ReBuf
-> JamoBuf
-> [Char]
-> ST s (Int, Maybe Char, ReBuf, JamoBuf)
forall s.
MArray s
-> Int
-> Maybe Char
-> ReBuf
-> JamoBuf
-> [Char]
-> ST s (Int, Maybe Char, ReBuf, JamoBuf)
decomposeAll MArray s
marr Int
index' Maybe Char
starter ReBuf
reBuf JamoBuf
jbuf (DecomposeMode -> Char -> [Char]
D.decomposeChar DecomposeMode
mode Char
ch)
        _ -> do
            (i :: Int
i, st :: Maybe Char
st, rbuf :: ReBuf
rbuf) <- MArray s
-> Int
-> Maybe Char
-> ReBuf
-> Char
-> ST s (Int, Maybe Char, ReBuf)
forall s.
MArray s
-> Int
-> Maybe Char
-> ReBuf
-> Char
-> ST s (Int, Maybe Char, ReBuf)
reorder MArray s
marr Int
index' Maybe Char
starter ReBuf
reBuf Char
ch
            (Int, Maybe Char, ReBuf, JamoBuf)
-> ST s (Int, Maybe Char, ReBuf, JamoBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i, Maybe Char
st, ReBuf
rbuf, JamoBuf
JamoEmpty)
    where
        {-# INLINE decomposeAll #-}
        decomposeAll :: MArray s
-> Int
-> Maybe Char
-> ReBuf
-> JamoBuf
-> [Char]
-> ST s (Int, Maybe Char, ReBuf, JamoBuf)
decomposeAll _ i :: Int
i st :: Maybe Char
st rbuf :: ReBuf
rbuf jb :: JamoBuf
jb [] = (Int, Maybe Char, ReBuf, JamoBuf)
-> ST s (Int, Maybe Char, ReBuf, JamoBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i, Maybe Char
st, ReBuf
rbuf, JamoBuf
jb)
        decomposeAll arr :: MArray s
arr i :: Int
i st :: Maybe Char
st rbuf :: ReBuf
rbuf jb :: JamoBuf
jb (x :: Char
x : xs :: [Char]
xs)  =
            case DecomposeMode -> Char -> DecomposeResult
D.isDecomposable DecomposeMode
mode Char
x of
                D.TrueA  -> do
                    (i' :: Int
i', st' :: Maybe Char
st', rbuf' :: ReBuf
rbuf', jb' :: JamoBuf
jb') <- MArray s
-> Int
-> Maybe Char
-> ReBuf
-> JamoBuf
-> [Char]
-> ST s (Int, Maybe Char, ReBuf, JamoBuf)
decomposeAll MArray s
arr Int
i Maybe Char
st ReBuf
rbuf JamoBuf
jb
                                                (DecomposeMode -> Char -> [Char]
D.decomposeChar DecomposeMode
mode Char
x)
                    MArray s
-> Int
-> Maybe Char
-> ReBuf
-> JamoBuf
-> [Char]
-> ST s (Int, Maybe Char, ReBuf, JamoBuf)
decomposeAll MArray s
arr Int
i' Maybe Char
st' ReBuf
rbuf' JamoBuf
jb' [Char]
xs
                _ -> do
                    -- XXX this recursive call here hurts performance
                    -- We can make the hangul composition a separate function
                    -- and call that or reorder here based on the type fo char
                    (i' :: Int
i', st' :: Maybe Char
st', rbuf' :: ReBuf
rbuf', jb' :: JamoBuf
jb') <- DecomposeMode
-> MArray s
-> Int
-> Maybe Char
-> ReBuf
-> JamoBuf
-> Char
-> ST s (Int, Maybe Char, ReBuf, JamoBuf)
forall s.
DecomposeMode
-> MArray s
-> Int
-> Maybe Char
-> ReBuf
-> JamoBuf
-> Char
-> ST s (Int, Maybe Char, ReBuf, JamoBuf)
composeChar DecomposeMode
mode MArray s
arr Int
i Maybe Char
st ReBuf
rbuf JamoBuf
jb Char
x
                    MArray s
-> Int
-> Maybe Char
-> ReBuf
-> JamoBuf
-> [Char]
-> ST s (Int, Maybe Char, ReBuf, JamoBuf)
decomposeAll MArray s
arr Int
i' Maybe Char
st' ReBuf
rbuf' JamoBuf
jb' [Char]
xs

        -- Unicode 9.0.0: 3.11
        -- D108 Reorderable pair: Two adjacent characters A and B in a coded
        -- character sequence <A,B> are a Reorderable Pair if and only if
        -- ccc(A) > ccc(B) > 0.
        --
        -- (array) (array index) (reorder buffer) (input char)
        {-# INLINE reorder #-}
        reorder :: MArray s
-> Int
-> Maybe Char
-> ReBuf
-> Char
-> ST s (Int, Maybe Char, ReBuf)
reorder _ i :: Int
i st :: Maybe Char
st Empty c :: Char
c = (Int, Maybe Char, ReBuf) -> ST s (Int, Maybe Char, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i, Maybe Char
st, Char -> ReBuf
One Char
c)

        -- Unicode 9.0.0: 3.11
        -- D111: a starter can never become a non-starter after
        -- combining. If that happens we will potentially have to remember all
        -- previous starters so that the new non-starter can be combined with
        -- the previous starter.
        --
        -- To compose, try to combine an unblocked char with the last starter
        -- and remove if combined. A char with combining class equal or lower
        -- than the previous char is blocked. It implies that only adjacent
        -- starters can be combined.
        --
        -- input char is a starter
        -- does it combine with the previous starter?
        -- if no then flush and replace the last starter
        reorder arr :: MArray s
arr i :: Int
i (Just st :: Char
st) (One c0 :: Char
c0) c :: Char
c | Bool -> Bool
not (Char -> Bool
CC.isCombining Char
c) = do
            case Char -> Char -> Maybe Char
C.composePair Char
st Char
c0 of
                Just x :: Char
x  -> case Char -> Char -> Maybe Char
C.composePair Char
x Char
c of
                    Just y :: Char
y -> (Int, Maybe Char, ReBuf) -> ST s (Int, Maybe Char, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
y, ReBuf
Empty)
                    Nothing -> do
                        Int
n <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
arr Int
i Char
x
                        (Int, Maybe Char, ReBuf) -> ST s (Int, Maybe Char, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
c, ReBuf
Empty)
                Nothing -> case Char -> Bool
CC.isCombining Char
c0 of
                    -- starter1 combining starter2
                    True -> do
                        Int
n1 <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
arr Int
i Char
st
                        Int
n2 <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
arr (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n1) Char
c0
                        (Int, Maybe Char, ReBuf) -> ST s (Int, Maybe Char, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n2, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
c, ReBuf
Empty)
                    -- starter1 starter2 starter3
                    False -> do
                        Int
n1 <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
arr Int
i Char
st
                        case Char -> Char -> Maybe Char
C.composePair Char
c0 Char
c of
                            Just y :: Char
y -> (Int, Maybe Char, ReBuf) -> ST s (Int, Maybe Char, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n1, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
y, ReBuf
Empty)
                            Nothing -> do
                                Int
n2 <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
arr (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n1) Char
c0
                                (Int, Maybe Char, ReBuf) -> ST s (Int, Maybe Char, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n2, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
c, ReBuf
Empty)

        reorder arr :: MArray s
arr i :: Int
i Nothing (One c0 :: Char
c0) c :: Char
c | Bool -> Bool
not (Char -> Bool
CC.isCombining Char
c) =
            case Char -> Char -> Maybe Char
C.composePair Char
c0 Char
c of
                Just x :: Char
x  -> (Int, Maybe Char, ReBuf) -> ST s (Int, Maybe Char, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
x, ReBuf
Empty)
                Nothing -> do
                    Int
n <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
arr Int
i Char
c0
                    (Int, Maybe Char, ReBuf) -> ST s (Int, Maybe Char, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
c, ReBuf
Empty)

        reorder arr :: MArray s
arr i :: Int
i (Just st :: Char
st) (One c0 :: Char
c0) c :: Char
c | Bool -> Bool
not (Char -> Bool
CC.isCombining Char
c0) = do
            case Char -> Char -> Maybe Char
C.composePair Char
st Char
c0 of
                Just x :: Char
x  -> (Int, Maybe Char, ReBuf) -> ST s (Int, Maybe Char, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
x, Char -> ReBuf
One Char
c)
                Nothing -> do
                    Int
n <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
arr Int
i Char
st
                    (Int, Maybe Char, ReBuf) -> ST s (Int, Maybe Char, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
c0, Char -> ReBuf
One Char
c)

        -- input char is combining and there is a starter char in the buffer
        -- flush the starter char and add the combining char to the buffer
        reorder _arr :: MArray s
_arr i :: Int
i Nothing (One c0 :: Char
c0) c :: Char
c | Bool -> Bool
not (Char -> Bool
CC.isCombining Char
c0) = do
            (Int, Maybe Char, ReBuf) -> ST s (Int, Maybe Char, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
c0, Char -> ReBuf
One Char
c)

        -- optimized ordering for common case of two combining chars
        -- XXX replace many with Two here
        reorder  _ i :: Int
i st :: Maybe Char
st (One c0 :: Char
c0) c :: Char
c = (Int, Maybe Char, ReBuf) -> ST s (Int, Maybe Char, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i, Maybe Char
st, [Char] -> ReBuf
Many [Char]
orderedPair)
            where
                -- {-# INLINE orderedPair #-}
                orderedPair :: [Char]
orderedPair =
                    case Char -> Char -> Bool
inOrder Char
c0 Char
c of
                        True  -> [Char
c0, Char
c]
                        False -> [Char
c, Char
c0]

                inOrder :: Char -> Char -> Bool
inOrder c1 :: Char
c1 c2 :: Char
c2 =
                    Char -> Int
CC.getCombiningClass Char
c1 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Char -> Int
CC.getCombiningClass Char
c2

        -- input char is a starter, flush the reorder buffer
        reorder arr :: MArray s
arr i :: Int
i (Just st :: Char
st) rbuf :: ReBuf
rbuf c :: Char
c | Bool -> Bool
not (Char -> Bool
CC.isCombining Char
c) = do
            (j :: Int
j, st2 :: Char
st2) <- MArray s -> Int -> Char -> ReBuf -> Char -> ST s (Int, Char)
forall s.
MArray s -> Int -> Char -> ReBuf -> Char -> ST s (Int, Char)
composeAndWrite MArray s
arr Int
i Char
st ReBuf
rbuf Char
c
            (Int, Maybe Char, ReBuf) -> ST s (Int, Maybe Char, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
j, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
st2, ReBuf
Empty)

        reorder arr :: MArray s
arr i :: Int
i Nothing rbuf :: ReBuf
rbuf c :: Char
c | Bool -> Bool
not (Char -> Bool
CC.isCombining Char
c) = do
            Int
j <- MArray s -> Int -> ReBuf -> ST s Int
forall s. MArray s -> Int -> ReBuf -> ST s Int
writeReorderBuffer MArray s
arr Int
i ReBuf
rbuf
            (Int, Maybe Char, ReBuf) -> ST s (Int, Maybe Char, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
j, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
c, ReBuf
Empty)

        -- unoptimized generic sort for more than two combining chars
        reorder _ i :: Int
i st :: Maybe Char
st (Many str :: [Char]
str) c :: Char
c =
            (Int, Maybe Char, ReBuf) -> ST s (Int, Maybe Char, ReBuf)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i, Maybe Char
st, [Char] -> ReBuf
Many ([Char] -> [Char]
sortCluster ([Char]
str [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char
c])))
            where
                {-# INLINE sortCluster #-}
                sortCluster :: [Char] -> [Char]
sortCluster =   ((Char, Int) -> Char) -> [(Char, Int)] -> [Char]
forall a b. (a -> b) -> [a] -> [b]
map (Char, Int) -> Char
forall a b. (a, b) -> a
fst
                              ([(Char, Int)] -> [Char])
-> ([Char] -> [(Char, Int)]) -> [Char] -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Char, Int) -> (Char, Int) -> Ordering)
-> [(Char, Int)] -> [(Char, Int)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy (((Char, Int) -> Int) -> (Char, Int) -> (Char, Int) -> Ordering
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing (Char, Int) -> Int
forall a b. (a, b) -> b
snd)
                              ([(Char, Int)] -> [(Char, Int)])
-> ([Char] -> [(Char, Int)]) -> [Char] -> [(Char, Int)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> (Char, Int)) -> [Char] -> [(Char, Int)]
forall a b. (a -> b) -> [a] -> [b]
map ((Char -> Int -> (Char, Int))
-> (Char -> Int) -> Char -> (Char, Int)
forall (m :: * -> *) a b. Monad m => m (a -> b) -> m a -> m b
ap (,) Char -> Int
CC.getCombiningClass)

-- | /O(n)/ Convert a 'Stream Char' into a composed normalized 'Text'.
unstreamC :: D.DecomposeMode -> Stream Char -> Text
unstreamC :: DecomposeMode -> Stream Char -> Text
unstreamC mode :: DecomposeMode
mode (Stream next0 :: s -> Step s Char
next0 s0 :: s
s0 len :: Size
len) = (forall s. (MArray s -> Int -> ST s Text) -> ST s Text) -> Text
runText ((forall s. (MArray s -> Int -> ST s Text) -> ST s Text) -> Text)
-> (forall s. (MArray s -> Int -> ST s Text) -> ST s Text) -> Text
forall a b. (a -> b) -> a -> b
$ \done :: MArray s -> Int -> ST s Text
done -> do
  -- Before encoding each char we perform a buffer realloc check assuming
  -- worst case encoding size of two 16-bit units for the char. Just add an
  -- extra space to the buffer so that we do not end up reallocating even when
  -- all the chars are encoded as single unit.
  let margin :: Int
margin = 1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
maxDecomposeLen
      mlen :: Int
mlen = (Int -> Size -> Int
upperBound 4 Size
len Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
margin)
  MArray s
arr0 <- Int -> ST s (MArray s)
forall s. Int -> ST s (MArray s)
A.new Int
mlen
  let outer :: MArray s
-> Int -> s -> Int -> Maybe Char -> ReBuf -> JamoBuf -> ST s Text
outer !MArray s
arr !Int
maxi = s -> Int -> Maybe Char -> ReBuf -> JamoBuf -> ST s Text
encode
       where
        -- keep the common case loop as small as possible
        encode :: s -> Int -> Maybe Char -> ReBuf -> JamoBuf -> ST s Text
encode !s
si !Int
di st :: Maybe Char
st rbuf :: ReBuf
rbuf jbuf :: JamoBuf
jbuf =
            -- simply check for the worst case
            if Int
maxi Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
di Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
margin
               then s -> Int -> Maybe Char -> ReBuf -> JamoBuf -> ST s Text
realloc s
si Int
di Maybe Char
st ReBuf
rbuf JamoBuf
jbuf
            else
                case s -> Step s Char
next0 s
si of
                    Done -> do
                        -- Flush any leftover buffers, only one of rbuf/jbuf
                        -- will have contents
                        Int
di'  <- MArray s -> Int -> Maybe Char -> ReBuf -> ST s Int
forall s. MArray s -> Int -> Maybe Char -> ReBuf -> ST s Int
writeStarterRbuf MArray s
arr Int
di Maybe Char
st ReBuf
rbuf
                        Int
di'' <- MArray s -> Int -> JamoBuf -> ST s Int
forall s. MArray s -> Int -> JamoBuf -> ST s Int
writeJamoBuf MArray s
arr Int
di' JamoBuf
jbuf
                        MArray s -> Int -> ST s Text
done MArray s
arr Int
di''
                    Skip si' :: s
si'    -> s -> Int -> Maybe Char -> ReBuf -> JamoBuf -> ST s Text
encode s
si' Int
di Maybe Char
st ReBuf
rbuf JamoBuf
jbuf
                    Yield c :: Char
c si' :: s
si' -> do
                        (di' :: Int
di', st' :: Maybe Char
st', rbuf' :: ReBuf
rbuf', jbuf' :: JamoBuf
jbuf') <- DecomposeMode
-> MArray s
-> Int
-> Maybe Char
-> ReBuf
-> JamoBuf
-> Char
-> ST s (Int, Maybe Char, ReBuf, JamoBuf)
forall s.
DecomposeMode
-> MArray s
-> Int
-> Maybe Char
-> ReBuf
-> JamoBuf
-> Char
-> ST s (Int, Maybe Char, ReBuf, JamoBuf)
composeChar DecomposeMode
mode MArray s
arr Int
di Maybe Char
st ReBuf
rbuf JamoBuf
jbuf Char
c
                        s -> Int -> Maybe Char -> ReBuf -> JamoBuf -> ST s Text
encode s
si' Int
di' Maybe Char
st' ReBuf
rbuf' JamoBuf
jbuf'

        -- keep uncommon case separate from the common case code
        {-# NOINLINE realloc #-}
        realloc :: s -> Int -> Maybe Char -> ReBuf -> JamoBuf -> ST s Text
realloc !s
si !Int
di st :: Maybe Char
st rbuf :: ReBuf
rbuf jbuf :: JamoBuf
jbuf = do
            let newlen :: Int
newlen = Int
maxi Int -> Int -> Int
forall a. Num a => a -> a -> a
* 2
            MArray s
arr' <- Int -> ST s (MArray s)
forall s. Int -> ST s (MArray s)
A.new Int
newlen
            MArray s -> Int -> MArray s -> Int -> Int -> ST s ()
forall s. MArray s -> Int -> MArray s -> Int -> Int -> ST s ()
A.copyM MArray s
arr' 0 MArray s
arr 0 Int
di
            MArray s
-> Int -> s -> Int -> Maybe Char -> ReBuf -> JamoBuf -> ST s Text
outer MArray s
arr' (Int
newlen Int -> Int -> Int
forall a. Num a => a -> a -> a
- 1) s
si Int
di Maybe Char
st ReBuf
rbuf JamoBuf
jbuf

  MArray s
-> Int -> s -> Int -> Maybe Char -> ReBuf -> JamoBuf -> ST s Text
outer MArray s
arr0 (Int
mlen Int -> Int -> Int
forall a. Num a => a -> a -> a
- 1) s
s0 0 Maybe Char
forall a. Maybe a
Nothing ReBuf
Empty JamoBuf
JamoEmpty
{-# INLINE [0] unstreamC #-}