{-# OPTIONS_GHC -fspec-constr #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveDataTypeable #-}
#if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ >= 702
{-# LANGUAGE Trustworthy #-}
#endif
-----------------------------------------------------------------------------
-- |
-- Module      :  Data.CharSet
-- Copyright   :  (c) Edward Kmett 2010-2011
-- License     :  BSD3
-- Maintainer  :  ekmett@gmail.com
-- Stability   :  experimental
-- Portability :  portable
--
-- A CharSet is an /efficient/ representation of a set of 'Char' values
-- designed for fast membership tests.
--
-- As an example @build isAlpha@ will create a set of alphabetic characters.
-- We can then use 'member' on the generated set to /efficiently/ test if a
-- given @Char@ represents an alphabetic character.
--
-- Designed to be imported qualified:
--
-- > import Data.CharSet (CharSet)
-- > import qualified Data.CharSet as CharSet
--
-------------------------------------------------------------------------------

module Data.CharSet
    (
    -- * Set type
      CharSet(..)
    -- * Operators
    , (\\)
    -- * Query
    , null
    , size
    , member
    , notMember
    , overlaps, isSubsetOf
    , isComplemented
    -- * Construction
    , build
    , empty
    , singleton
    , full
    , insert
    , delete
    , complement
    , range
    -- * Combine
    , union
    , intersection
    , difference
    -- * Filter
    , filter
    , partition
    -- * Map
    , map
    -- * Fold
    , fold
    -- * Conversion
    -- ** List
    , toList
    , fromList
    -- ** Ordered list
    , toAscList
    , fromAscList
    , fromDistinctAscList
    -- ** IntMaps
    , fromCharSet
    , toCharSet
    -- ** Array
    , toArray
    ) where

#if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ >= 608
import Data.String (IsString(..))
-- <<< -XOverloadedStrings >>> was introduced by GHC 6.8.1
#endif

import Data.Array.Unboxed hiding (range)
import Data.Data
import Data.Function (on)
import Data.IntSet (IntSet)
import Data.CharSet.ByteSet (ByteSet)
import qualified Data.CharSet.ByteSet as ByteSet
import Data.Bits hiding (complement)
import Data.Word
import Data.ByteString.Internal (c2w)
import Data.Semigroup
import qualified Data.IntSet as I
import qualified Data.List as L
import Prelude hiding (filter, map, null)
import qualified Prelude as P
import Text.Read

-- | Stored as a (possibly negated) IntSet and a fast set used for the head byte.
--
-- The set of valid (possibly negated) head bytes is stored unboxed as a 32-byte
-- bytestring-based lookup table.
data CharSet = CharSet
    !Bool    -- Whether ByteSet and IntSet are negated
    !ByteSet -- Set of head bytes, unboxed
    !IntSet  -- Set of characters in the charset
  deriving Typeable

#if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ >= 608
-- | @= CharSet.`fromList`@
instance IsString CharSet where
  fromString :: String -> CharSet
fromString = String -> CharSet
fromList
#endif

charSet :: Bool -> IntSet -> CharSet
charSet :: Bool -> IntSet -> CharSet
charSet b :: Bool
b s :: IntSet
s = Bool -> ByteSet -> IntSet -> CharSet
CharSet Bool
b ([Word8] -> ByteSet
ByteSet.fromList ((Int -> Word8) -> [Int] -> [Word8]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Int -> Word8
headByte (IntSet -> [Int]
I.toAscList IntSet
s))) IntSet
s

headByte :: Int -> Word8
headByte :: Int -> Word8
headByte i :: Int
i
  | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= 0x7f   = Int -> Word8
forall a. Enum a => Int -> a
toEnum Int
i
  | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= 0x7ff  = Int -> Word8
forall a. Enum a => Int -> a
toEnum (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ 0x80 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
i Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` 6)
  | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= 0xffff = Int -> Word8
forall a. Enum a => Int -> a
toEnum (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ 0xe0 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
i Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` 12)
  | Bool
otherwise   = Int -> Word8
forall a. Enum a => Int -> a
toEnum (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ 0xf0 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
i Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` 18)

pos :: IntSet -> CharSet
pos :: IntSet -> CharSet
pos = Bool -> IntSet -> CharSet
charSet Bool
True

neg :: IntSet -> CharSet
neg :: IntSet -> CharSet
neg = Bool -> IntSet -> CharSet
charSet Bool
False

(\\) :: CharSet -> CharSet -> CharSet
\\ :: CharSet -> CharSet -> CharSet
(\\) = CharSet -> CharSet -> CharSet
difference

-- | Applies a predicate across the whole range of possible character values
-- to create a set of only those characters which satisfy the predicate.
--
-- As an example @build isAlpha@ will generate a CharSet of all
-- alphabetic characters.
build :: (Char -> Bool) -> CharSet
build :: (Char -> Bool) -> CharSet
build p :: Char -> Bool
p = String -> CharSet
fromDistinctAscList (String -> CharSet) -> String -> CharSet
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> String -> String
forall a. (a -> Bool) -> [a] -> [a]
P.filter Char -> Bool
p [Char
forall a. Bounded a => a
minBound .. Char
forall a. Bounded a => a
maxBound]
{-# INLINE build #-}

map :: (Char -> Char) -> CharSet -> CharSet
map :: (Char -> Char) -> CharSet -> CharSet
map f :: Char -> Char
f (CharSet True _ i :: IntSet
i) = IntSet -> CharSet
pos ((Int -> Int) -> IntSet -> IntSet
I.map (Char -> Int
forall a. Enum a => a -> Int
fromEnum (Char -> Int) -> (Int -> Char) -> Int -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Char
f (Char -> Char) -> (Int -> Char) -> Int -> Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Char
forall a. Enum a => Int -> a
toEnum) IntSet
i)
map f :: Char -> Char
f (CharSet False _ i :: IntSet
i) = String -> CharSet
fromList (String -> CharSet) -> String -> CharSet
forall a b. (a -> b) -> a -> b
$ (Char -> Char) -> String -> String
forall a b. (a -> b) -> [a] -> [b]
P.map Char -> Char
f (String -> String) -> String -> String
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> String -> String
forall a. (a -> Bool) -> [a] -> [a]
P.filter (\x :: Char
x -> Char -> Int
forall a. Enum a => a -> Int
fromEnum Char
x Int -> IntSet -> Bool
`I.notMember` IntSet
i) [Char
ul..Char
uh]
{-# INLINE map #-}

isComplemented :: CharSet -> Bool
isComplemented :: CharSet -> Bool
isComplemented (CharSet True _ _) = Bool
False
isComplemented (CharSet False _ _) = Bool
True
{-# INLINE isComplemented #-}

toList :: CharSet -> String
toList :: CharSet -> String
toList (CharSet True _ i :: IntSet
i) = (Int -> Char) -> [Int] -> String
forall a b. (a -> b) -> [a] -> [b]
P.map Int -> Char
forall a. Enum a => Int -> a
toEnum (IntSet -> [Int]
I.toList IntSet
i)
toList (CharSet False _ i :: IntSet
i) = (Char -> Bool) -> String -> String
forall a. (a -> Bool) -> [a] -> [a]
P.filter (\x :: Char
x -> Char -> Int
forall a. Enum a => a -> Int
fromEnum Char
x Int -> IntSet -> Bool
`I.notMember` IntSet
i) [Char
ul..Char
uh]
{-# INLINE toList #-}

toAscList :: CharSet -> String
toAscList :: CharSet -> String
toAscList (CharSet True _ i :: IntSet
i) = (Int -> Char) -> [Int] -> String
forall a b. (a -> b) -> [a] -> [b]
P.map Int -> Char
forall a. Enum a => Int -> a
toEnum (IntSet -> [Int]
I.toAscList IntSet
i)
toAscList (CharSet False _ i :: IntSet
i) = (Char -> Bool) -> String -> String
forall a. (a -> Bool) -> [a] -> [a]
P.filter (\x :: Char
x -> Char -> Int
forall a. Enum a => a -> Int
fromEnum Char
x Int -> IntSet -> Bool
`I.notMember` IntSet
i) [Char
ul..Char
uh]
{-# INLINE toAscList #-}

empty :: CharSet
empty :: CharSet
empty = IntSet -> CharSet
pos IntSet
I.empty

singleton :: Char -> CharSet
singleton :: Char -> CharSet
singleton = IntSet -> CharSet
pos (IntSet -> CharSet) -> (Char -> IntSet) -> Char -> CharSet
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> IntSet
I.singleton (Int -> IntSet) -> (Char -> Int) -> Char -> IntSet
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
forall a. Enum a => a -> Int
fromEnum
{-# INLINE singleton #-}

full :: CharSet
full :: CharSet
full = IntSet -> CharSet
neg IntSet
I.empty

-- | /O(n)/ worst case
null :: CharSet -> Bool
null :: CharSet -> Bool
null (CharSet True _ i :: IntSet
i) = IntSet -> Bool
I.null IntSet
i
null (CharSet False _ i :: IntSet
i) = IntSet -> Int
I.size IntSet
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
numChars
{-# INLINE null #-}

-- | /O(n)/
size :: CharSet -> Int
size :: CharSet -> Int
size (CharSet True _ i :: IntSet
i) = IntSet -> Int
I.size IntSet
i
size (CharSet False _ i :: IntSet
i) = Int
numChars Int -> Int -> Int
forall a. Num a => a -> a -> a
- IntSet -> Int
I.size IntSet
i
{-# INLINE size #-}

insert :: Char -> CharSet -> CharSet
insert :: Char -> CharSet -> CharSet
insert c :: Char
c (CharSet True _ i :: IntSet
i) = IntSet -> CharSet
pos (Int -> IntSet -> IntSet
I.insert (Char -> Int
forall a. Enum a => a -> Int
fromEnum Char
c) IntSet
i)
insert c :: Char
c (CharSet False _ i :: IntSet
i) = IntSet -> CharSet
neg (Int -> IntSet -> IntSet
I.delete (Char -> Int
forall a. Enum a => a -> Int
fromEnum Char
c) IntSet
i)
{-# INLINE insert #-}

range :: Char -> Char -> CharSet
range :: Char -> Char -> CharSet
range a :: Char
a b :: Char
b
  | Char
a Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
b = String -> CharSet
fromDistinctAscList [Char
a..Char
b]
  | Bool
otherwise = CharSet
empty

delete :: Char -> CharSet -> CharSet
delete :: Char -> CharSet -> CharSet
delete c :: Char
c (CharSet True _ i :: IntSet
i) = IntSet -> CharSet
pos (Int -> IntSet -> IntSet
I.delete (Char -> Int
forall a. Enum a => a -> Int
fromEnum Char
c) IntSet
i)
delete c :: Char
c (CharSet False _ i :: IntSet
i) = IntSet -> CharSet
neg (Int -> IntSet -> IntSet
I.insert (Char -> Int
forall a. Enum a => a -> Int
fromEnum Char
c) IntSet
i)
{-# INLINE delete #-}

complement :: CharSet -> CharSet
complement :: CharSet -> CharSet
complement (CharSet True s :: ByteSet
s i :: IntSet
i) = Bool -> ByteSet -> IntSet -> CharSet
CharSet Bool
False ByteSet
s IntSet
i
complement (CharSet False s :: ByteSet
s i :: IntSet
i) = Bool -> ByteSet -> IntSet -> CharSet
CharSet Bool
True ByteSet
s IntSet
i
{-# INLINE complement #-}

union :: CharSet -> CharSet -> CharSet
union :: CharSet -> CharSet -> CharSet
union (CharSet True _ i :: IntSet
i) (CharSet True _ j :: IntSet
j) = IntSet -> CharSet
pos (IntSet -> IntSet -> IntSet
I.union IntSet
i IntSet
j)
union (CharSet True _ i :: IntSet
i) (CharSet False _ j :: IntSet
j) = IntSet -> CharSet
neg (IntSet -> IntSet -> IntSet
I.difference IntSet
j IntSet
i)
union (CharSet False _ i :: IntSet
i) (CharSet True _ j :: IntSet
j) = IntSet -> CharSet
neg (IntSet -> IntSet -> IntSet
I.difference IntSet
i IntSet
j)
union (CharSet False _ i :: IntSet
i) (CharSet False _ j :: IntSet
j) = IntSet -> CharSet
neg (IntSet -> IntSet -> IntSet
I.intersection IntSet
i IntSet
j)
{-# INLINE union #-}

intersection :: CharSet -> CharSet -> CharSet
intersection :: CharSet -> CharSet -> CharSet
intersection (CharSet True _ i :: IntSet
i) (CharSet True _ j :: IntSet
j) = IntSet -> CharSet
pos (IntSet -> IntSet -> IntSet
I.intersection IntSet
i IntSet
j)
intersection (CharSet True _ i :: IntSet
i) (CharSet False _ j :: IntSet
j) = IntSet -> CharSet
pos (IntSet -> IntSet -> IntSet
I.difference IntSet
i IntSet
j)
intersection (CharSet False _ i :: IntSet
i) (CharSet True _ j :: IntSet
j) = IntSet -> CharSet
pos (IntSet -> IntSet -> IntSet
I.difference IntSet
j IntSet
i)
intersection (CharSet False _ i :: IntSet
i) (CharSet False _ j :: IntSet
j) = IntSet -> CharSet
neg (IntSet -> IntSet -> IntSet
I.union IntSet
i IntSet
j)
{-# INLINE intersection #-}

difference :: CharSet -> CharSet -> CharSet
difference :: CharSet -> CharSet -> CharSet
difference (CharSet True _ i :: IntSet
i) (CharSet True _ j :: IntSet
j) = IntSet -> CharSet
pos (IntSet -> IntSet -> IntSet
I.difference IntSet
i IntSet
j)
difference (CharSet True _ i :: IntSet
i) (CharSet False _ j :: IntSet
j) = IntSet -> CharSet
pos (IntSet -> IntSet -> IntSet
I.intersection IntSet
i IntSet
j)
difference (CharSet False _ i :: IntSet
i) (CharSet True _ j :: IntSet
j) = IntSet -> CharSet
neg (IntSet -> IntSet -> IntSet
I.union IntSet
i IntSet
j)
difference (CharSet False _ i :: IntSet
i) (CharSet False _ j :: IntSet
j) = IntSet -> CharSet
pos (IntSet -> IntSet -> IntSet
I.difference IntSet
j IntSet
i)
{-# INLINE difference #-}

member :: Char -> CharSet -> Bool
member :: Char -> CharSet -> Bool
member c :: Char
c (CharSet True b :: ByteSet
b i :: IntSet
i)
  | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Int -> Char
forall a. Enum a => Int -> a
toEnum 0x7f = Word8 -> ByteSet -> Bool
ByteSet.member (Char -> Word8
c2w Char
c) ByteSet
b
  | Bool
otherwise        = Int -> IntSet -> Bool
I.member (Char -> Int
forall a. Enum a => a -> Int
fromEnum Char
c) IntSet
i
member c :: Char
c (CharSet False b :: ByteSet
b i :: IntSet
i)
  | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Int -> Char
forall a. Enum a => Int -> a
toEnum 0x7f = Bool -> Bool
not (Word8 -> ByteSet -> Bool
ByteSet.member (Char -> Word8
c2w Char
c) ByteSet
b)
  | Bool
otherwise        = Int -> IntSet -> Bool
I.notMember (Char -> Int
forall a. Enum a => a -> Int
fromEnum Char
c) IntSet
i
{-# INLINE member #-}

notMember :: Char -> CharSet -> Bool
notMember :: Char -> CharSet -> Bool
notMember c :: Char
c s :: CharSet
s = Bool -> Bool
not (Char -> CharSet -> Bool
member Char
c CharSet
s)
{-# INLINE notMember #-}

fold :: (Char -> b -> b) -> b -> CharSet -> b
fold :: (Char -> b -> b) -> b -> CharSet -> b
fold f :: Char -> b -> b
f z :: b
z (CharSet True _ i :: IntSet
i) = (Int -> b -> b) -> b -> IntSet -> b
forall b. (Int -> b -> b) -> b -> IntSet -> b
I.fold (Char -> b -> b
f (Char -> b -> b) -> (Int -> Char) -> Int -> b -> b
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Char
forall a. Enum a => Int -> a
toEnum) b
z IntSet
i
fold f :: Char -> b -> b
f z :: b
z (CharSet False _ i :: IntSet
i) = (Char -> b -> b) -> b -> String -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Char -> b -> b
f b
z (String -> b) -> String -> b
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> String -> String
forall a. (a -> Bool) -> [a] -> [a]
P.filter (\x :: Char
x -> Char -> Int
forall a. Enum a => a -> Int
fromEnum Char
x Int -> IntSet -> Bool
`I.notMember` IntSet
i) [Char
ul..Char
uh]
{-# INLINE fold #-}

filter :: (Char -> Bool) -> CharSet -> CharSet
filter :: (Char -> Bool) -> CharSet -> CharSet
filter p :: Char -> Bool
p (CharSet True _ i :: IntSet
i) = IntSet -> CharSet
pos ((Int -> Bool) -> IntSet -> IntSet
I.filter (Char -> Bool
p (Char -> Bool) -> (Int -> Char) -> Int -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Char
forall a. Enum a => Int -> a
toEnum) IntSet
i)
filter p :: Char -> Bool
p (CharSet False _ i :: IntSet
i) = IntSet -> CharSet
neg (IntSet -> CharSet) -> IntSet -> CharSet
forall a b. (a -> b) -> a -> b
$ (Int -> IntSet -> IntSet) -> IntSet -> [Int] -> IntSet
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (Int -> IntSet -> IntSet
I.insert) IntSet
i ([Int] -> IntSet) -> [Int] -> IntSet
forall a b. (a -> b) -> a -> b
$ (Int -> Bool) -> [Int] -> [Int]
forall a. (a -> Bool) -> [a] -> [a]
P.filter (\x :: Int
x -> (Int
x Int -> IntSet -> Bool
`I.notMember` IntSet
i) Bool -> Bool -> Bool
&& Bool -> Bool
not (Char -> Bool
p (Int -> Char
forall a. Enum a => Int -> a
toEnum Int
x))) [Int
ol..Int
oh]
{-# INLINE filter #-}

partition :: (Char -> Bool) -> CharSet -> (CharSet, CharSet)
partition :: (Char -> Bool) -> CharSet -> (CharSet, CharSet)
partition p :: Char -> Bool
p (CharSet True _ i :: IntSet
i) = (IntSet -> CharSet
pos IntSet
l, IntSet -> CharSet
pos IntSet
r)
    where (l :: IntSet
l,r :: IntSet
r) = (Int -> Bool) -> IntSet -> (IntSet, IntSet)
I.partition (Char -> Bool
p (Char -> Bool) -> (Int -> Char) -> Int -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Char
forall a. Enum a => Int -> a
toEnum) IntSet
i
partition p :: Char -> Bool
p (CharSet False _ i :: IntSet
i) = (IntSet -> CharSet
neg ((Int -> IntSet -> IntSet) -> IntSet -> [Int] -> IntSet
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Int -> IntSet -> IntSet
I.insert IntSet
i [Int]
l), IntSet -> CharSet
neg ((Int -> IntSet -> IntSet) -> IntSet -> [Int] -> IntSet
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Int -> IntSet -> IntSet
I.insert IntSet
i [Int]
r))
    where (l :: [Int]
l,r :: [Int]
r) = (Int -> Bool) -> [Int] -> ([Int], [Int])
forall a. (a -> Bool) -> [a] -> ([a], [a])
L.partition (Char -> Bool
p (Char -> Bool) -> (Int -> Char) -> Int -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Char
forall a. Enum a => Int -> a
toEnum) ([Int] -> ([Int], [Int])) -> [Int] -> ([Int], [Int])
forall a b. (a -> b) -> a -> b
$ (Int -> Bool) -> [Int] -> [Int]
forall a. (a -> Bool) -> [a] -> [a]
P.filter (\x :: Int
x -> Int
x Int -> IntSet -> Bool
`I.notMember` IntSet
i) [Int
ol..Int
oh]
{-# INLINE partition #-}

overlaps :: CharSet -> CharSet -> Bool
overlaps :: CharSet -> CharSet -> Bool
overlaps (CharSet True _ i :: IntSet
i) (CharSet True _ j :: IntSet
j) = Bool -> Bool
not (IntSet -> Bool
I.null (IntSet -> IntSet -> IntSet
I.intersection IntSet
i IntSet
j))
overlaps (CharSet True _ i :: IntSet
i) (CharSet False _ j :: IntSet
j) = Bool -> Bool
not (IntSet -> IntSet -> Bool
I.isSubsetOf IntSet
j IntSet
i)
overlaps (CharSet False _ i :: IntSet
i) (CharSet True _ j :: IntSet
j) = Bool -> Bool
not (IntSet -> IntSet -> Bool
I.isSubsetOf IntSet
i IntSet
j)
overlaps (CharSet False _ i :: IntSet
i) (CharSet False _ j :: IntSet
j) = (Int -> Bool) -> [Int] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (\x :: Int
x -> Int -> IntSet -> Bool
I.notMember Int
x IntSet
i Bool -> Bool -> Bool
&& Int -> IntSet -> Bool
I.notMember Int
x IntSet
j) [Int
ol..Int
oh] -- not likely
{-# INLINE overlaps #-}

isSubsetOf :: CharSet -> CharSet -> Bool
isSubsetOf :: CharSet -> CharSet -> Bool
isSubsetOf (CharSet True _ i :: IntSet
i) (CharSet True _ j :: IntSet
j) = IntSet -> IntSet -> Bool
I.isSubsetOf IntSet
i IntSet
j
isSubsetOf (CharSet True _ i :: IntSet
i) (CharSet False _ j :: IntSet
j) = IntSet -> Bool
I.null (IntSet -> IntSet -> IntSet
I.intersection IntSet
i IntSet
j)
isSubsetOf (CharSet False _ i :: IntSet
i) (CharSet True _ j :: IntSet
j) = (Int -> Bool) -> [Int] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (\x :: Int
x -> Int -> IntSet -> Bool
I.member Int
x IntSet
i Bool -> Bool -> Bool
&& Int -> IntSet -> Bool
I.member Int
x IntSet
j) [Int
ol..Int
oh] -- not bloody likely
isSubsetOf (CharSet False _ i :: IntSet
i) (CharSet False _ j :: IntSet
j) = IntSet -> IntSet -> Bool
I.isSubsetOf IntSet
j IntSet
i
{-# INLINE isSubsetOf #-}

fromList :: String -> CharSet
fromList :: String -> CharSet
fromList = IntSet -> CharSet
pos (IntSet -> CharSet) -> (String -> IntSet) -> String -> CharSet
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Int] -> IntSet
I.fromList ([Int] -> IntSet) -> (String -> [Int]) -> String -> IntSet
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Int) -> String -> [Int]
forall a b. (a -> b) -> [a] -> [b]
P.map Char -> Int
forall a. Enum a => a -> Int
fromEnum
{-# INLINE fromList #-}

fromAscList :: String -> CharSet
fromAscList :: String -> CharSet
fromAscList = IntSet -> CharSet
pos (IntSet -> CharSet) -> (String -> IntSet) -> String -> CharSet
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Int] -> IntSet
I.fromAscList ([Int] -> IntSet) -> (String -> [Int]) -> String -> IntSet
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Int) -> String -> [Int]
forall a b. (a -> b) -> [a] -> [b]
P.map Char -> Int
forall a. Enum a => a -> Int
fromEnum
{-# INLINE fromAscList #-}

fromDistinctAscList :: String -> CharSet
fromDistinctAscList :: String -> CharSet
fromDistinctAscList = IntSet -> CharSet
pos (IntSet -> CharSet) -> (String -> IntSet) -> String -> CharSet
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Int] -> IntSet
I.fromDistinctAscList ([Int] -> IntSet) -> (String -> [Int]) -> String -> IntSet
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Int) -> String -> [Int]
forall a b. (a -> b) -> [a] -> [b]
P.map Char -> Int
forall a. Enum a => a -> Int
fromEnum
{-# INLINE fromDistinctAscList #-}

-- isProperSubsetOf :: CharSet -> CharSet -> Bool
-- isProperSubsetOf (P i) (P j) = I.isProperSubsetOf i j
-- isProperSubsetOf (P i) (N j) = null (I.intersection i j) && ...
-- isProperSubsetOf (N i) (N j) = I.isProperSubsetOf j i

ul, uh :: Char
ul :: Char
ul = Char
forall a. Bounded a => a
minBound
uh :: Char
uh = Char
forall a. Bounded a => a
maxBound
{-# INLINE ul #-}
{-# INLINE uh #-}

ol, oh :: Int
ol :: Int
ol = Char -> Int
forall a. Enum a => a -> Int
fromEnum Char
ul
oh :: Int
oh = Char -> Int
forall a. Enum a => a -> Int
fromEnum Char
uh
{-# INLINE ol #-}
{-# INLINE oh #-}

numChars :: Int
numChars :: Int
numChars = Int
oh Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
ol Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1
{-# INLINE numChars #-}

instance Data CharSet where
  gfoldl :: (forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> CharSet -> c CharSet
gfoldl k :: forall d b. Data d => c (d -> b) -> d -> c b
k z :: forall g. g -> c g
z set :: CharSet
set
    | CharSet -> Bool
isComplemented CharSet
set = (CharSet -> CharSet) -> c (CharSet -> CharSet)
forall g. g -> c g
z CharSet -> CharSet
complement c (CharSet -> CharSet) -> CharSet -> c CharSet
forall d b. Data d => c (d -> b) -> d -> c b
`k` CharSet -> CharSet
complement CharSet
set
    | Bool
otherwise          = (String -> CharSet) -> c (String -> CharSet)
forall g. g -> c g
z String -> CharSet
fromList c (String -> CharSet) -> String -> c CharSet
forall d b. Data d => c (d -> b) -> d -> c b
`k` CharSet -> String
toList CharSet
set

  toConstr :: CharSet -> Constr
toConstr set :: CharSet
set
    | CharSet -> Bool
isComplemented CharSet
set = Constr
complementConstr
    | Bool
otherwise = Constr
fromListConstr

  dataTypeOf :: CharSet -> DataType
dataTypeOf _ = DataType
charSetDataType

  gunfold :: (forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c CharSet
gunfold k :: forall b r. Data b => c (b -> r) -> c r
k z :: forall r. r -> c r
z c :: Constr
c = case Constr -> Int
constrIndex Constr
c of
    1 -> c (String -> CharSet) -> c CharSet
forall b r. Data b => c (b -> r) -> c r
k ((String -> CharSet) -> c (String -> CharSet)
forall r. r -> c r
z String -> CharSet
fromList)
    2 -> c (CharSet -> CharSet) -> c CharSet
forall b r. Data b => c (b -> r) -> c r
k ((CharSet -> CharSet) -> c (CharSet -> CharSet)
forall r. r -> c r
z CharSet -> CharSet
complement)
    _ -> String -> c CharSet
forall a. HasCallStack => String -> a
error "gunfold"

fromListConstr :: Constr
fromListConstr :: Constr
fromListConstr   = DataType -> String -> [String] -> Fixity -> Constr
mkConstr DataType
charSetDataType "fromList" [] Fixity
Prefix
{-# NOINLINE fromListConstr #-}

complementConstr :: Constr
complementConstr :: Constr
complementConstr = DataType -> String -> [String] -> Fixity -> Constr
mkConstr DataType
charSetDataType "complement" [] Fixity
Prefix
{-# NOINLINE complementConstr #-}

charSetDataType :: DataType
charSetDataType :: DataType
charSetDataType  = String -> [Constr] -> DataType
mkDataType "Data.CharSet.CharSet" [Constr
fromListConstr, Constr
complementConstr]
{-# NOINLINE charSetDataType #-}

-- returns an intset and if the charSet is positive
fromCharSet :: CharSet -> (Bool, IntSet)
fromCharSet :: CharSet -> (Bool, IntSet)
fromCharSet (CharSet b :: Bool
b _ i :: IntSet
i) = (Bool
b, IntSet
i)
{-# INLINE fromCharSet #-}

toCharSet :: IntSet -> CharSet
toCharSet :: IntSet -> CharSet
toCharSet = IntSet -> CharSet
pos
{-# INLINE toCharSet #-}

instance Eq CharSet where
  == :: CharSet -> CharSet -> Bool
(==) = String -> String -> Bool
forall a. Eq a => a -> a -> Bool
(==) (String -> String -> Bool)
-> (CharSet -> String) -> CharSet -> CharSet -> Bool
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` CharSet -> String
toAscList

instance Ord CharSet where
  compare :: CharSet -> CharSet -> Ordering
compare = String -> String -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (String -> String -> Ordering)
-> (CharSet -> String) -> CharSet -> CharSet -> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` CharSet -> String
toAscList

instance Bounded CharSet where
  minBound :: CharSet
minBound = CharSet
empty
  maxBound :: CharSet
maxBound = CharSet
full

-- TODO return a tighter bounded array perhaps starting from the least element present to the last element present?
toArray :: CharSet -> UArray Char Bool
toArray :: CharSet -> UArray Char Bool
toArray set :: CharSet
set = (Char, Char) -> [(Char, Bool)] -> UArray Char Bool
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
(i, i) -> [(i, e)] -> a i e
array (Char
forall a. Bounded a => a
minBound, Char
forall a. Bounded a => a
maxBound) ([(Char, Bool)] -> UArray Char Bool)
-> [(Char, Bool)] -> UArray Char Bool
forall a b. (a -> b) -> a -> b
$ (Char -> (Char, Bool)) -> String -> [(Char, Bool)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\x :: Char
x -> (Char
x, Char
x Char -> CharSet -> Bool
`member` CharSet
set)) [Char
forall a. Bounded a => a
minBound .. Char
forall a. Bounded a => a
maxBound]

instance Show CharSet where
  showsPrec :: Int -> CharSet -> String -> String
showsPrec d :: Int
d i :: CharSet
i
    | CharSet -> Bool
isComplemented CharSet
i = Bool -> (String -> String) -> String -> String
showParen (Int
d Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> 10) ((String -> String) -> String -> String)
-> (String -> String) -> String -> String
forall a b. (a -> b) -> a -> b
$ String -> String -> String
showString "complement " (String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> CharSet -> String -> String
forall a. Show a => Int -> a -> String -> String
showsPrec 11 (CharSet -> CharSet
complement CharSet
i)
    | Bool
otherwise        = Bool -> (String -> String) -> String -> String
showParen (Int
d Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> 10) ((String -> String) -> String -> String)
-> (String -> String) -> String -> String
forall a b. (a -> b) -> a -> b
$ String -> String -> String
showString "fromDistinctAscList " (String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> String -> String -> String
forall a. Show a => Int -> a -> String -> String
showsPrec 11 (CharSet -> String
toAscList CharSet
i)

instance Read CharSet where
  readPrec :: ReadPrec CharSet
readPrec = ReadPrec CharSet -> ReadPrec CharSet
forall a. ReadPrec a -> ReadPrec a
parens (ReadPrec CharSet -> ReadPrec CharSet)
-> ReadPrec CharSet -> ReadPrec CharSet
forall a b. (a -> b) -> a -> b
$ ReadPrec CharSet
complemented ReadPrec CharSet -> ReadPrec CharSet -> ReadPrec CharSet
forall a. ReadPrec a -> ReadPrec a -> ReadPrec a
+++ ReadPrec CharSet
normal
    where
      complemented :: ReadPrec CharSet
complemented = Int -> ReadPrec CharSet -> ReadPrec CharSet
forall a. Int -> ReadPrec a -> ReadPrec a
prec 10 (ReadPrec CharSet -> ReadPrec CharSet)
-> ReadPrec CharSet -> ReadPrec CharSet
forall a b. (a -> b) -> a -> b
$ do
        Ident "complement" <- ReadPrec Lexeme
lexP
        CharSet -> CharSet
complement (CharSet -> CharSet) -> ReadPrec CharSet -> ReadPrec CharSet
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` ReadPrec CharSet -> ReadPrec CharSet
forall a. ReadPrec a -> ReadPrec a
step ReadPrec CharSet
forall a. Read a => ReadPrec a
readPrec
      normal :: ReadPrec CharSet
normal = Int -> ReadPrec CharSet -> ReadPrec CharSet
forall a. Int -> ReadPrec a -> ReadPrec a
prec 10 (ReadPrec CharSet -> ReadPrec CharSet)
-> ReadPrec CharSet -> ReadPrec CharSet
forall a b. (a -> b) -> a -> b
$ do
        Ident "fromDistinctAscList" <- ReadPrec Lexeme
lexP
        String -> CharSet
fromDistinctAscList (String -> CharSet) -> ReadPrec String -> ReadPrec CharSet
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` ReadPrec String -> ReadPrec String
forall a. ReadPrec a -> ReadPrec a
step ReadPrec String
forall a. Read a => ReadPrec a
readPrec

instance Semigroup CharSet where
  <> :: CharSet -> CharSet -> CharSet
(<>) = CharSet -> CharSet -> CharSet
union

instance Monoid CharSet where
  mempty :: CharSet
mempty = CharSet
empty
  mappend :: CharSet -> CharSet -> CharSet
mappend = CharSet -> CharSet -> CharSet
union