{-# LANGUAGE BangPatterns #-}

-- |
-- Module      : Data.Unicode.Properties.DecomposeHangul
-- Copyright   : (c) 2016 Harendra Kumar
--
-- License     : BSD-3-Clause
-- Maintainer  : harendra.kumar@gmail.com
-- Stability   : experimental
--
module Data.Unicode.Properties.DecomposeHangul
    (decomposeCharHangul
    , hangulFirst
    , hangulLast
    , isHangul
    , isHangulLV

    , isJamo
    , jamoLFirst
    , jamoLIndex
    , jamoLLast

    , jamoVFirst
    , jamoVIndex
    , jamoVLast

    , jamoTFirst
    , jamoTCount
    , jamoTIndex
    , jamoLast

    , jamoNCount
    )
where

import Control.Exception              (assert)
import Data.Char                      (ord)
import GHC.Base                       (unsafeChr)
import Data.Unicode.Internal.Division (quotRem21, quotRem28)

-- Hangul characters can be decomposed algorithmically instead of via mappings

-------------------------------------------------------------------------------
-- General utilities used by decomposition as well as composition
-------------------------------------------------------------------------------

-- * https://www.unicode.org/versions/Unicode13.0.0/ch03.pdf
-- * https://en.wikipedia.org/wiki/List_of_Hangul_jamo
-- * https://www.unicode.org/reports/tr15/tr15-18.html#Hangul

-- D134   Standard Korean syllable block: A sequence of one or more L followed
-- by a sequence of  one  or  more  V  and  a  sequence  of  zero  or  more  T,
-- or any other sequence that is canonically equivalent

-- jamo leading
jamoLFirst, jamoLCount, jamoLLast :: Int
jamoLFirst :: Int
jamoLFirst  = 0x1100
jamoLCount :: Int
jamoLCount = 19
jamoLLast :: Int
jamoLLast = Int
jamoLFirst Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
jamoLCount Int -> Int -> Int
forall a. Num a => a -> a -> a
- 1

-- jamo vowel
jamoVFirst, jamoVCount, jamoVLast :: Int
jamoVFirst :: Int
jamoVFirst  = 0x1161
jamoVCount :: Int
jamoVCount = 21
jamoVLast :: Int
jamoVLast = Int
jamoVFirst Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
jamoVCount Int -> Int -> Int
forall a. Num a => a -> a -> a
- 1

-- jamo trailing
-- jamoTFirst does not represent a valid T, it represents a missing T i.e. LV
-- without a T. See comments under jamoTIndex .
jamoTFirst, jamoTCount :: Int
jamoTFirst :: Int
jamoTFirst  = 0x11a7
jamoTCount :: Int
jamoTCount = 28

jamoLast :: Int
jamoLast :: Int
jamoLast = Int
jamoTFirst Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
jamoTCount Int -> Int -> Int
forall a. Num a => a -> a -> a
- 1

-- VCount * TCount
jamoNCount :: Int
jamoNCount :: Int
jamoNCount = 588

-- hangul
hangulFirst, hangulLast :: Int
hangulFirst :: Int
hangulFirst = 0xac00
hangulLast :: Int
hangulLast = Int
hangulFirst Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
jamoLCount Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
jamoVCount Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
jamoTCount Int -> Int -> Int
forall a. Num a => a -> a -> a
- 1

isHangul :: Char -> Bool
isHangul :: Char -> Bool
isHangul c :: Char
c = Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
hangulFirst Bool -> Bool -> Bool
&& Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
hangulLast
    where n :: Int
n = Char -> Int
ord Char
c

isHangulLV :: Char -> Bool
isHangulLV :: Char -> Bool
isHangulLV c :: Char
c = Bool -> ((Int, Int) -> Int) -> (Int, Int) -> Int
forall a. (?callStack::CallStack) => Bool -> a -> a
assert (Int
jamoTCount Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== 28)
    (Int, Int) -> Int
forall a b. (a, b) -> b
snd (Int -> (Int, Int)
quotRem28 (Char -> Int
ord Char
c Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
hangulFirst)) Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== 0

isJamo :: Char -> Bool
isJamo :: Char -> Bool
isJamo c :: Char
c = Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
jamoLFirst Bool -> Bool -> Bool
&& Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
jamoLast
    where n :: Int
n = Char -> Int
ord Char
c

-- if it is a jamo L char return the index
jamoLIndex :: Char -> Maybe Int
jamoLIndex :: Char -> Maybe Int
jamoLIndex c :: Char
c
  | Int
index Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= 0 Bool -> Bool -> Bool
&& Int
index Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
jamoLCount = Int -> Maybe Int
forall a. a -> Maybe a
Just Int
index
  | Bool
otherwise = Maybe Int
forall a. Maybe a
Nothing
    where index :: Int
index = Char -> Int
ord Char
c Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
jamoLFirst

jamoVIndex :: Char -> Maybe Int
jamoVIndex :: Char -> Maybe Int
jamoVIndex c :: Char
c
  | Int
index Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= 0 Bool -> Bool -> Bool
&& Int
index Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
jamoVCount = Int -> Maybe Int
forall a. a -> Maybe a
Just Int
index
  | Bool
otherwise = Maybe Int
forall a. Maybe a
Nothing
    where index :: Int
index = Char -> Int
ord Char
c Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
jamoVFirst

-- Note that index 0 is not a valid index for a trailing consonant. Index 0
-- means no T, only LV syllable.
-- See Unicode 9.0.0: 3.12 (Hangul Syllable Decomposition)
-- TBase is set to one less than the beginning of the range of trailing
-- consonants, which starts at U+11A8.
jamoTIndex :: Char -> Maybe Int
jamoTIndex :: Char -> Maybe Int
jamoTIndex c :: Char
c
  | Int
index Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> 0 Bool -> Bool -> Bool
&& Int
index Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
jamoTCount = Int -> Maybe Int
forall a. a -> Maybe a
Just Int
index
  | Bool
otherwise = Maybe Int
forall a. Maybe a
Nothing
    where index :: Int
index = Char -> Int
ord Char
c Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
jamoTFirst

-------------------------------------------------------------------------------
-- Hangul decomposition
-------------------------------------------------------------------------------

{-# INLINE decomposeCharHangul #-}
decomposeCharHangul :: Char -> (Char, Char, Char)
decomposeCharHangul :: Char -> (Char, Char, Char)
decomposeCharHangul c :: Char
c = (Char
l, Char
v, Char
t)
    where
        i :: Int
i = (Char -> Int
ord Char
c) Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
hangulFirst
        !(tn :: Int
tn, ti :: Int
ti) = Bool -> (Int -> (Int, Int)) -> Int -> (Int, Int)
forall a. (?callStack::CallStack) => Bool -> a -> a
assert (Int
jamoTCount Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== 28) Int -> (Int, Int)
quotRem28 Int
i
        !(li :: Int
li, vi :: Int
vi) = Bool -> (Int -> (Int, Int)) -> Int -> (Int, Int)
forall a. (?callStack::CallStack) => Bool -> a -> a
assert (Int
jamoVCount Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== 21) Int -> (Int, Int)
quotRem21 Int
tn

        l :: Char
l = Int -> Char
unsafeChr (Int
jamoLFirst Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
li)
        v :: Char
v = Int -> Char
unsafeChr (Int
jamoVFirst Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
vi)
        t :: Char
t = Int -> Char
unsafeChr (Int
jamoTFirst Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
ti)