{-# LANGUAGE RecordWildCards, OverloadedStrings, FlexibleInstances #-} module NFA where import Data.List (nub, (\\), union, lookup) import Data.String (IsString (..)) data NFA a q = NFA { states :: [q] , alphabet :: [a] , initial :: q , trans :: [(q,a,q)] , finals :: [q] } deriving (Show) data Regex a = Letter [a] -- choice | Union (Regex a) (Regex a) | Intersection (Regex a) (Regex a) | Concat (Regex a) (Regex a) | Compl (Regex a) | Star (Regex a) deriving (Show) ---------------------------------------------------------------------- -- NFA ---------------------------------------------------------------------- -- auxiliary dfs :: Eq n => n -> (n -> [n]) -> [n] dfs n succs = walk [n] [] where walk [] visited = visited walk (m:ms) visited | m `elem` visited = walk ms visited | otherwise = walk (succs m ++ ms) (m:visited) powerset :: [a] -> [[a]] powerset [] = [[]] powerset (a:as) = pps ++ [ a : ps | ps <- pps] where pps = powerset as -- utilities delta :: (Eq a, Eq q) => NFA a q -> q -> a -> [q] delta nfa q a = [p2 | (p1,b,p2) <- trans nfa, p1 == q, a == b ] delta' :: (Eq a, Eq q) => NFA a q -> q -> [a] -> [q] delta' nfa = walk where walk q [] = [q] walk q (a:as) = concatMap (\ q' -> walk q' as) (delta nfa q a) successors :: Eq q => NFA a q -> q -> [q] successors nfa q = [ p2 | (p1,_,p2) <- trans nfa, p1 == q ] isFinal :: Eq q => NFA a q -> q -> Bool isFinal NFA {..} q = q `elem` finals -- rename is supposed to be injective, for simplicity renameStates :: (q -> q') -> NFA a q -> NFA a q' renameStates rename nfa = NFA { states = rename `map` states nfa , alphabet = alphabet nfa , initial = rename (initial nfa) , trans = map (\ (p,a,q) -> (rename p, a, rename q)) (trans nfa) , finals = rename `map` finals nfa } --- good enough for exercise minimise :: Eq q => NFA a q -> NFA a q minimise = restrictReachable where restrictReachable nfa@NFA {..} = nfa { states = states' , trans = trans' } where states' = dfs initial (successors nfa) trans' = filter (\ (p,_,_) -> p `elem` states') trans determinize :: (Eq q, Eq a) => NFA a q -> NFA a [q] determinize nfa = minimise NFA { states = pps , alphabet = alphabet nfa , initial = [initial nfa] , trans = [(ps,a, nub (concatMap (\ p -> delta nfa p a) ps)) | ps <- pps , a <- alphabet nfa ] , finals = filter (any (isFinal nfa)) pps } where pps = powerset (nub (states nfa)) -- REGEX operators data UnionState q1 q2 = Unique | Q1 q1 | Q2 q2 deriving (Eq, Ord) nfaUnion :: (Eq q1, Eq q2, Eq a) => NFA a q1 -> NFA a q2 -> NFA a (UnionState q1 q2) nfaUnion nfa1 nfa2 = minimise NFA { states = Unique : (states nfa1' ++ states nfa2') , alphabet = alphabet nfa1' ++ alphabet nfa2' , initial = Unique , trans = [ (Unique,a,q) | (p,a,q) <- transU, p == initial nfa1' || p == initial nfa2' ] ++ transU , finals = if isFinal nfa1 (initial nfa1) || isFinal nfa2 (initial nfa2) then Unique : (finals nfa1' ++ finals nfa2') else finals nfa1' ++ finals nfa2' } where nfa1' = renameStates Q1 nfa1 nfa2' = renameStates Q2 nfa2 transU = trans nfa1' ++ trans nfa2' -- a more efficient implementation would traverse gradually the product graph from (initial nfa1, initial nfa2) nfaIntersection :: (Eq q1, Eq q2, Eq a) => NFA a q1 -> NFA a q2 -> NFA a (q1,q2) nfaIntersection nfa1 nfa2 = minimise NFA { states = [(p,q) | p <- states nfa1, q <- states nfa2] , alphabet = alphabet nfa1 `union` alphabet nfa2 , initial = (initial nfa1, initial nfa2) , trans = [((p,q),a,(p',q')) | p <- states nfa1 , q <- states nfa2 , a <- alphabet nfa1 `union` alphabet nfa2 , p' <- delta nfa1 p a , q' <- delta nfa2 q a ] , finals = [(p,q) | p <- finals nfa1, q <- finals nfa2] } nfaConcatenate :: (Eq q1, Eq q2, Eq a) => NFA a q1 -> NFA a q2 -> NFA a (Either q1 q2) nfaConcatenate nfa1 nfa2 = minimise NFA { states = states nfa1' ++ states nfa2' , alphabet = alphabet nfa1 `union` alphabet nfa2 , initial = initial nfa1' , trans = bridge ++ trans nfa1' ++ trans nfa2' , finals = finals nfa2' } where nfa1' = renameStates Left nfa1 nfa2' = renameStates Right nfa2 bridge = [(p,a, initial nfa2') | (p,a,q) <- trans nfa1', isFinal nfa1' q] ++ if isFinal nfa1 (initial nfa1) then [(initial nfa1', a,q) | a <- alphabet nfa2, q <- delta nfa2' (initial nfa2') a] else [] nfaStar :: (Eq a, Eq q) => NFA a q -> NFA a (Maybe q) nfaStar nfa = minimise NFA { states = fresh : states nfa' , alphabet = alphabet nfa , initial = fresh , trans = [(p,a,q) | p <- fresh : finals nfa' , a <- alphabet nfa' , q <- delta nfa' (initial nfa') a] ++ trans nfa' , finals = fresh : finals nfa' } where nfa' = renameStates Just nfa fresh = Nothing nfaComplement :: (Eq a, Eq q) => NFA a q -> NFA a [q] nfaComplement nfa = dfa { finals = states dfa \\ finals dfa } where dfa = determinize nfa ---------------------------------------------------------------------- -- conversion ---------------------------------------------------------------------- toNFA :: Eq a => Regex a -> NFA a Int toNFA regex = case regex of Letter as -> NFA { states = [0,1], alphabet = as, initial = 0, trans = [(0,a,1) | a <- as], finals = [1] } Union r1 r2 -> combineWith nfaUnion r1 r2 Intersection r1 r2 -> combineWith nfaIntersection r1 r2 Concat r1 r2 -> combineWith nfaConcatenate r1 r2 Compl r -> apply nfaComplement r Star r -> apply nfaStar r where combineWith f r1 r2 = rename (toNFA r1 `f` toNFA r2) apply f r = rename (f (toNFA r)) rename nfa = renameStates ren nfa where enum = zip (states nfa) [0..] ren q = case lookup q enum of {Just i -> i; Nothing -> undefined} match :: Eq a => [a] -> Regex a -> Bool match as r = any (isFinal nfa) (delta' nfa (initial nfa) as) where nfa = toNFA r -- examples instance IsString (Regex Char) where fromString [] = Star (Letter []) fromString [c] = Letter [c] fromString (c:cs) = Letter [c] `Concat` fromString cs (.|),(.&),(.*) :: Regex a -> Regex a -> Regex a (.|) = Union (.&) = Intersection (.*) = Concat ex1 :: Regex Char ex1 = Star ("a" .| "b") .* "a" .* Star "ab" -- match "aaa" ex1