{-# LANGUAGE BangPatterns #-} -- various functions that convert lists of digits to an integer -- -- author: Bertram Felgenhauer -- -- changes: -- 2016-10-05: valInteger0 and valInteger (bottom up merging variants) -- 2016-10-06: stack based variants -- 2016-10-07: "bs inlined" stack variant, and original Text.Read.Lex code module FromDigits where import Data.List import Control.Monad -- The basic idea for all code below is to combine digits of base b in -- pairs to obtain digits in base b*b; for example: -- 1 2 3 4 5 6 7 8 9 0 1 2 3 -- --> 12 34 56 78 90 12 3 -- --> 1234 5678 9012 3 -- --> 12345678 90123 -- --> 1234567890123 -- Note that after each pass, all digits have the same base, except possibly -- for the last one. So that remainder needs to be tracked as well. ------------------------------------------------------------------------ -- valInteger0: first prototype; the base of the last digit is tracked -- at the end of the list. valInteger0 :: Integer -> [Integer] -> Integer valInteger0 _ [] = 0 valInteger0 b xs = go b (xs ++ [0,1]) where go :: Integer -> [Integer] -> Integer go _ [x, _] = x go _ [x, y, b'] = x * b' + y go b xs = go (b*b) (combine b xs) combine :: Integer -> [Integer] -> [Integer] combine _ [x, b'] = [x, b'] combine b [x, y, b'] = [x * b' + y, b' * b] combine b (x : y : xs) = x*b + y : combine b xs ------------------------------------------------------------------------ -- valInteger: this is a serious implementation, it features -- a) a custom datatype that stores the information of the last digit cleanly -- b) chunking to accumulate small numbers without much control flow overhead -- Digits' represents a stream of digits where the last digit carries its own -- base. For example, in base 100, we could represent 12345 as either one of -- DCons 12 (DCons 34 (DNil 5 10)) or -- DCons 1 (DCons 23 (DCons 45 (DNil 0 1))) data Digits' = DCons !Integer Digits' | DNil !Integer Integer valInteger :: Int -> [Int] -> Integer valInteger _ [] = 0 valInteger b ds = go b1 (chunks 0 0 ds) where -- chunking pass: collect digits in base b^chunkSize chunkSize :: Int chunkSize = 7 -- b^chunkSize must fit into an Int b1 :: Integer b1 = fromIntegral (b^chunkSize) chunks :: Int -> Int -> [Int] -> Digits' chunks l d ds | l == chunkSize = DCons (fromIntegral d) (chunks 0 0 ds) chunks l d [] = DNil (fromIntegral d) (fromIntegral (b^l)) chunks l d (d' : ds) = chunks (l+1) (d*b + d') ds -- bottom-up combination, squaring the base in each pass go :: Integer -> Digits' -> Integer go _ (DNil d _) = d go _ (DCons d' (DNil d b)) = d'*b + d go b (DCons d' ds) = go (b*b) (combine b d' ds) combine :: Integer -> Integer -> Digits' -> Digits' combine b d (DNil d' b') = DNil (d*b' + d') (b * b') combine b d (DCons d' (ds@DNil{})) = DCons (d*b + d') ds combine b d (DCons d' (DCons d'' ds)) = DCons (d*b + d') (combine b d'' ds) ------------------------------------------------------------------------ -- now come a few stack based variants that explicitly do the merging -- incrementally from left to right (interleaving the various levels in -- the example above), instead of obtaining this effect by lazy evaluation -- Stack is isomorphic to [Maybe Integer], allowing holes for bases in -- a sequence b, b^2, b^4, ... that are currently not occupied. -- -- For 1 2 3 4 5, the following stacks will be produced (- = Skip): -- [] -> [1] -> [-,12] -> [3,12] -> [-,-,1234] -> [5,-,1234] data Stack = SNil | SSkip !Stack | SCons !Integer Stack -- valIntegerS' is an accident: using foldr really defeats the purpose of -- the exercise... I'm keeping it as a curiosity. valIntegerS' :: Integer -> [Integer] -> Integer valIntegerS' b = fromStack bs 0 . foldr (step bs) SNil where bs :: [Integer] bs = iterate (\b -> b*b) b step :: [Integer] -> Integer -> Stack -> Stack step _ d SNil = SCons d SNil step _ d (SSkip s) = SCons d s step (b:bs) d (SCons d' s) = SSkip (step bs (d*b + d') s) fromStack :: [Integer] -> Integer -> Stack -> Integer fromStack _ a SNil = a fromStack (b:bs) a (SSkip s) = fromStack bs a s fromStack (b:bs) a (SCons d s) = fromStack bs (a * b + d) s ------------------------------------------------------------------------ -- valIntegerS is a prototype without chunking valIntegerS :: Integer -> [Integer] -> Integer valIntegerS b = fromStack bs . foldl' (step bs) SNil where bs :: [Integer] bs = iterate (\b -> b*b) b step :: [Integer] -> Stack -> Integer -> Stack step _ SNil d = SCons d SNil step _ (SSkip s) d = SCons d s step (b:bs) (SCons d' s) d = SSkip (step bs s (d + d'*b)) fromStack :: [Integer] -> Stack -> Integer fromStack _ SNil = 0 fromStack (b:bs) (SSkip s) = fromStack bs s fromStack (b:bs) (SCons d s) = (fromStack bs s) * b + d -- data Stack = SNil | SSkip !Stack | SCons !Integer Stack ------------------------------------------------------------------------ -- valInteger' is the serious implementation, featuring -- a) chunking -- b) stack-based explicit merging -- -- it could be incorporated into a parser by adapting the `goChunks` -- function (incidentally the name indicates that it arose from fusing two -- functions, `chunks` as above in `valInteger`, and `go`, folding -- over the resulting list as the `foldl` in `valIntegerS` does.) valInteger' :: Int -> [Int] -> Integer valInteger' b = goChunks SNil 0 0 where chunkSize :: Int chunkSize = 7 -- b^chunkSize must fit into an Int b1 :: Integer b1 = fromIntegral (b^chunkSize) bs :: [Integer] bs = iterate (\b -> b*b) b1 -- combined chunking (in blocks of `chunkSize` digts) and -- merging (into base b1, b1^2, b1^4, etc.) pass. goChunks :: Stack -> Int -> Int -> [Int] -> Integer goChunks !s l d ds | l == chunkSize = goChunks (step bs s (fromIntegral d)) 0 0 ds goChunks s l d [] = fromStack (fromIntegral d) (fromIntegral (b^l)) bs s goChunks s l d (d' : ds) = goChunks s (l+1) (d*b + d') ds -- merge one digit into the stack step :: [Integer] -> Stack -> Integer -> Stack step _ SNil d = SCons d SNil step _ (SSkip s) d = SCons d s step (b:bs) (SCons d' s) d = SSkip (step bs s (d + d'*b)) -- finally concatenate the mixed base digits in the stack fromStack :: Integer -> Integer -> [Integer] -> Stack -> Integer fromStack d' _ _ SNil = d' fromStack d' b' (b:bs) (SSkip s) = fromStack d' b' bs s fromStack d' b' (b:bs) (SCons d s) = fromStack (d*b' + d') (b*b') bs s ------------------------------------------------------------------------ -- same as above but "inlining" the `bs` list into the stack data Stack' = SNil' | SSkip' Integer !Stack' | SCons' Integer !Integer Stack' valInteger'' :: Int -> [Int] -> Integer valInteger'' b = goChunks SNil' 0 0 where chunkSize :: Int chunkSize = 7 -- b^chunkSize must fit into an Int b1 :: Integer b1 = fromIntegral (b^chunkSize) -- combined chunking (in blocks of `chunkSize` digts) and -- merging (into base b1, b1^2, b1^4, etc.) pass. goChunks :: Stack' -> Int -> Int -> [Int] -> Integer goChunks !s l d ds | l == chunkSize = goChunks (step s (fromIntegral d)) 0 0 ds goChunks s l d [] = fromStack (fromIntegral d) (fromIntegral (b^l)) s goChunks s l d (d' : ds) = goChunks s (l+1) (d*b + d') ds -- merge one digit into the stack step :: Stack' -> Integer -> Stack' step SNil' d = SCons' b1 d SNil' step (SSkip' b s) d = SCons' b d s step (SCons' b d' s) d = SSkip' b (step' b s (d + d'*b)) step' :: Integer -> Stack' -> Integer -> Stack' step' b SNil' d = SCons' (b*b) d SNil' step' _ (SSkip' b s) d = SCons' b d s step' _ (SCons' b d' s) d = SSkip' b (step' b s (d + d'*b)) -- finally concatenate the mixed base digits in the stack fromStack :: Integer -> Integer -> Stack' -> Integer fromStack d' _ SNil' = d' fromStack d' b' (SSkip' b s) = fromStack d' b' s fromStack d' b' (SCons' b d s) = fromStack (d*b' + d') (b*b') s ------------------------------------------------------------------------ -- `valIntegerB` is a 'baseline' version of `valInteger`, with no real -- chunking (chunk size is 1) valIntegerB :: Integer -> [Integer] -> Integer valIntegerB _ [] = 0 valIntegerB b ds = go b (chunks ds) where chunks :: [Integer] -> Digits' chunks [] = DNil 0 1 chunks (d : ds) = DCons d (chunks ds) -- bottom-up combination, squaring the base in each pass go :: Integer -> Digits' -> Integer go _ (DNil d _) = d go _ (DCons d' (DNil d b)) = d'*b + d go b (DCons d' ds) = go (b*b) (combine b d' ds) combine :: Integer -> Integer -> Digits' -> Digits' combine b d (DNil d' b') = DNil (d*b' + d') (b * b') combine b d (DCons d' (ds@DNil{})) = DCons (d*b + d') ds combine b d (DCons d' (DCons d'' ds)) = DCons (d*b + d') (combine b d'' ds) ------------------------------------------------------------------------ -- The following is code from Text.Read.Lex -- The following algorithm is only linear for types whose Num operations -- are in constant time. valSimple :: (Num a, Integral d) => a -> [d] -> a valSimple base = go 0 where go r [] = r go r (d : ds) = r' `seq` go r' ds where r' = r * base + fromIntegral d {-# INLINE valSimple #-} -- A sub-quadratic algorithm for Integer. Pairs of adjacent radix b -- digits are combined into a single radix b^2 digit. This process is -- repeated until we are left with a single digit. This algorithm -- performs well only on large inputs, so we use the simple algorithm -- for smaller inputs. valIntegerX :: Integer -> [Int] -> Integer valIntegerX b0 ds0 = go b0 (length ds0) $ map fromIntegral ds0 where go _ _ [] = 0 go _ _ [d] = d go b l ds | l > 40 = b' `seq` go b' l' (combine b ds') | otherwise = valSimple b ds where -- ensure that we have an even number of digits -- before we call combine: ds' = if even l then ds else 0 : ds b' = b * b l' = (l + 1) `quot` 2 combine b (d1 : d2 : ds) = d `seq` (d : combine b ds) where d = d1 * b + d2 combine _ [] = [] ------------------------------------------------------------------------ -- some timing tests -- lots of small integers, e.g., test1 [1,3] test1 :: [Int] -> Integer test1 xs = foldl' (+) 0 $ map (valInteger 10) (replicateM 18 xs) test1' :: [Int] -> Integer test1' xs = foldl' (+) 0 $ map (valInteger' 10) (replicateM 18 xs) test1'' :: [Int] -> Integer test1'' xs = foldl' (+) 0 $ map (valInteger'' 10) (replicateM 18 xs) test1X :: [Int] -> Integer test1X xs = foldl' (+) 0 $ map (valIntegerX 10) (replicateM 18 xs) testS' :: [Integer] -> Integer testS' xs = foldl' (+) 0 $ map (valIntegerS 10) (replicateM 18 xs) testS :: [Integer] -> Integer testS xs = foldl' (+) 0 $ map (valIntegerS 10) (replicateM 18 xs) testB :: [Integer] -> Integer testB xs = foldl' (+) 0 $ map (valIntegerB 10) (replicateM 18 xs) -- one big integer, e.g., test2 1000000 test2 :: Int -> Integer test2 l = valInteger 10 (replicate l 3) `mod` 123456789 test2' :: Int -> Integer test2' l = valInteger' 10 (replicate l 3) `mod` 123456789 test2'' :: Int -> Integer test2'' l = valInteger'' 10 (replicate l 3) `mod` 123456789 test2X :: Int -> Integer test2X l = valIntegerX 10 (replicate l 3) `mod` 123456789 testQ' :: Int -> Integer testQ' l = valIntegerS' 10 (replicate l 3) `mod` 123456789 testQ :: Int -> Integer testQ l = valIntegerS 10 (replicate l 3) `mod` 123456789 testR :: Int -> Integer testR l = valIntegerB 10 (replicate l 3) `mod` 123456789