{-# LANGUAGE DeriveGeneric, DefaultSignatures #-}
{-# OPTIONS_HADDOCK hide #-}

module Analysis.Capstone where

import Parser.ParserX86Instruction
import Base


import Hapstone.Capstone
import qualified Hapstone.Internal.Capstone as Capstone
import Data.List
import Data.List.Split (splitOn)
import Data.Char (toUpper,isSpace)
import Text.ParserCombinators.Parsec
import Data.Word (Word64,Word8)
import Debug.Trace
import qualified Data.IntMap as IM
import Data.Maybe (fromJust,catMaybes)
import X86.Opcode (Opcode(..))
import X86.Prefix (Prefix(..))
import qualified X86.Operand as X86
import qualified X86.Instruction as X86
import Generic.Address (AddressWord64(..))
import Generic.Instruction (GenericInstruction(..))


import Data.IORef

-- This file provides a function 
--      disassemble :: IM.IntMap Word8 -> Int -> IO (Maybe Instr)
-- Given a dump (mapping of addresses to bytes) and an address, it tries to retrieve a single instruction.
--
-- The disassembly is based on Hapstone, see:
--    https://github.com/ibabushkin/hapstone
--
--
-- Disassembly may produce an error, e.g., if an unknown mnemonic is encountered. If so, this is solved by adding the mnemonic to 
--    src/X86_Datastructures.hs
-- Note that in that case the instruction is unsupported, i.e., it must be given semantics in src/SymbolicExecution.hs as well.
-- In case of such an error, or other ones, please contact us.




disasm_config :: [Word8] -> Word64 -> Disassembler ()
disasm_config [Word8]
bytes Word64
a = Disassembler :: forall a.
CsArch
-> [CsMode]
-> [Word8]
-> Word64
-> Int
-> Bool
-> Maybe CsSkipdataStruct
-> (Csh -> CsInsn -> IO a)
-> Disassembler a
Disassembler { 
  -- Options: CsArchArm, CsArchArm64, CsArchMips, CsArchX86, CsArchPpc, CsArchSparc, CsArchSysz, CsArchXcore
  arch :: CsArch
arch = CsArch
Capstone.CsArchX86,
  -- Modes (some may be combined by adding to the list): CsModeLittleEndian, CsModeArm, CsMode16 (16-bit x86), CsMode32 (32-bit x86), CsMode64 (64-bit x86-64/amd64 or PPC), CsModeThumb, CsModeMclass, CsModeV8 (ARMv8 A32), CsModeMicro, CsModeMips3, CsModeMips32r6, CsModeMipsGp64, CsModeV9 (SparcV9 mode), CsModeBigEndian, CsModeMips32, CsModeMips64
  modes :: [CsMode]
modes = [CsMode
Capstone.CsMode64, CsMode
Capstone.CsModeLittleEndian],
  -- buffer to disassemble, as [Word8]
  buffer :: [Word8]
buffer = [Word8]
bytes,
  -- address of first byte in the buffer, as Word64
  addr :: Word64
addr = Word64
a,
  -- number of instructions to disassemble (0 for maximum)
  num :: Int
num = Int
1,
  -- include detailed information? True/False
  detail :: Bool
Hapstone.Capstone.detail = Bool
False,
  -- setup SKIPDATA options, as Maybe CsSkipdataStruct
  skip :: Maybe CsSkipdataStruct
skip = CsSkipdataStruct -> Maybe CsSkipdataStruct
forall a. a -> Maybe a
Just (CsSkipdataStruct
defaultSkipdataStruct),
  -- action to run on each instruction, a function with signature Csh -> CsInsn -> IO a; default is defaultAction
  action :: Csh -> CsInsn -> IO ()
action = Csh -> CsInsn -> IO ()
defaultAction
 }

mk_operands :: a -> [Char] -> [Maybe Operand]
mk_operands a
cs_instr [Char]
cs_ops = (Int -> Maybe Operand) -> [Int] -> [Maybe Operand]
forall a b. (a -> b) -> [a] -> [b]
map Int -> Maybe Operand
mk_operand [Int
0,Int
1,Int
2]
 where
  mk_operand :: Int -> Maybe Operand
mk_operand Int
n = 
    let splits :: [[Char]]
splits = [Char] -> [Char] -> [[Char]]
forall a. Eq a => [a] -> [a] -> [[a]]
splitOn [Char]
"," [Char]
cs_ops in
      if [Char] -> [Char]
trim [Char]
cs_ops [Char] -> [Char] -> Bool
forall a. Eq a => a -> a -> Bool
== [Char]
"" then
        Maybe Operand
forall a. Maybe a
Nothing
      else if [[Char]] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [[Char]]
splits Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
n then
        case [Char] -> Either ParseError Operand
parse_operand ([Char] -> Either ParseError Operand)
-> [Char] -> Either ParseError Operand
forall a b. (a -> b) -> a -> b
$ [[Char]]
splits [[Char]] -> Int -> [Char]
forall a. [a] -> Int -> a
!! Int
n of
          Left ParseError
err -> [Char] -> Maybe Operand
forall a. HasCallStack => [Char] -> a
error ([Char]
"Could not parse operand " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show Int
n [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
": \"" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
cs_ops [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"\" of \"" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ a -> [Char]
forall a. Show a => a -> [Char]
show a
cs_instr [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"\"")
          Right Operand
op -> Operand -> Maybe Operand
forall a. a -> Maybe a
Just Operand
op
      else
        Maybe Operand
forall a. Maybe a
Nothing
  parse_operand :: String -> Either ParseError X86.Operand
  parse_operand :: [Char] -> Either ParseError Operand
parse_operand = Parsec [Char] () Operand
-> [Char] -> [Char] -> Either ParseError Operand
forall s t a.
Stream s Identity t =>
Parsec s () a -> [Char] -> s -> Either ParseError a
parse Parsec [Char] () Operand
forall u. ParsecT [Char] u Identity Operand
operand [Char]
"" ([Char] -> Either ParseError Operand)
-> ([Char] -> [Char]) -> [Char] -> Either ParseError Operand
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> [Char]
trim

trim :: [Char] -> [Char]
trim = (Char -> Bool) -> [Char] -> [Char]
forall a. (a -> Bool) -> [a] -> [a]
dropWhileEnd Char -> Bool
isWhiteSpace ([Char] -> [Char]) -> ([Char] -> [Char]) -> [Char] -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> [Char] -> [Char]
forall a. (a -> Bool) -> [a] -> [a]
dropWhile Char -> Bool
isWhiteSpace

mk_instr :: Capstone.CsInsn -> Word64 -> Maybe X86.Instruction
mk_instr :: CsInsn -> Word64 -> Maybe Instruction
mk_instr CsInsn
cs_instr Word64
a =
  let addr :: AddressWord64
addr          = Word64 -> AddressWord64
AddressWord64 Word64
a
      ops :: [Maybe Operand]
ops           = CsInsn -> [Char] -> [Maybe Operand]
forall a. Show a => a -> [Char] -> [Maybe Operand]
mk_operands CsInsn
cs_instr ([Char] -> [Maybe Operand]) -> [Char] -> [Maybe Operand]
forall a b. (a -> b) -> a -> b
$ CsInsn -> [Char]
Capstone.opStr CsInsn
cs_instr
      (Maybe Prefix
prefix,Opcode
m)    = [Char] -> (Maybe Prefix, Opcode)
parseMnemonicAndPrefix ([Char] -> (Maybe Prefix, Opcode))
-> [Char] -> (Maybe Prefix, Opcode)
forall a b. (a -> b) -> a -> b
$ CsInsn -> [Char]
Capstone.mnemonic CsInsn
cs_instr
      size :: Int
size          = [Word8] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length ([Word8] -> Int) -> [Word8] -> Int
forall a b. (a -> b) -> a -> b
$ CsInsn -> [Word8]
Capstone.bytes CsInsn
cs_instr
      i :: Instruction
i             = AddressWord64
-> Maybe Prefix
-> Opcode
-> Maybe Operand
-> [Operand]
-> Maybe Int
-> Instruction
forall label storage prefix opcode annotation.
label
-> Maybe prefix
-> opcode
-> Maybe (GenericOperand storage)
-> [GenericOperand storage]
-> Maybe annotation
-> GenericInstruction label storage prefix opcode annotation
Instruction AddressWord64
addr Maybe Prefix
prefix Opcode
m Maybe Operand
forall a. Maybe a
Nothing ([Maybe Operand] -> [Operand]
forall a. [Maybe a] -> [a]
catMaybes [Maybe Operand]
ops) (Int -> Maybe Int
forall a. a -> Maybe a
Just Int
size) in
    if Opcode
m Opcode -> Opcode -> Bool
forall a. Eq a => a -> a -> Bool
== Opcode
InvalidOpcode Bool -> Bool -> Bool
|| Maybe Prefix
prefix Maybe Prefix -> Maybe Prefix -> Bool
forall a. Eq a => a -> a -> Bool
== Prefix -> Maybe Prefix
forall a. a -> Maybe a
Just Prefix
InvalidPrefix then
      -- trace ("Error during disassembling (translation of Capstone to datastructure) @0x" ++ showHex a ++ ": " ++ show cs_instr  ++ ": " ++ show i) 
      Maybe Instruction
forall a. Maybe a
Nothing
    else
       Instruction -> Maybe Instruction
forall a. a -> Maybe a
Just Instruction
i
 where
  parseMnemonicAndPrefix :: [Char] -> (Maybe Prefix, Opcode)
parseMnemonicAndPrefix [Char]
str =
    case [Char] -> [[Char]]
words [Char]
str of
      [[Char]
m]   -> (Maybe Prefix
forall a. Maybe a
Nothing,[Char] -> Opcode
read_opcode ([Char] -> Opcode) -> [Char] -> Opcode
forall a b. (a -> b) -> a -> b
$ (Char -> Char) -> [Char] -> [Char]
forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
toUpper [Char]
m)
      [[Char]
p,[Char]
m] -> (Prefix -> Maybe Prefix
forall a. a -> Maybe a
Just (Prefix -> Maybe Prefix) -> Prefix -> Maybe Prefix
forall a b. (a -> b) -> a -> b
$ [Char] -> Prefix
parsePrefix [Char]
p,[Char] -> Opcode
read_opcode ([Char] -> Opcode) -> [Char] -> Opcode
forall a b. (a -> b) -> a -> b
$ (Char -> Char) -> [Char] -> [Char]
forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
toUpper [Char]
m)



-- disassemble an instruction from a bytestream
-- the bytestream "buffer" is assumed to be located at virtual address "a"
-- we use memoization: each instruction is disassembled at most once
disassemble :: IORef (IM.IntMap X86.Instruction) -> [Word8] -> Word64 -> IO (Maybe X86.Instruction)
disassemble :: IORef (IntMap Instruction)
-> [Word8] -> Word64 -> IO (Maybe Instruction)
disassemble IORef (IntMap Instruction)
ioref [Word8]
buffer Word64
a = do
  IntMap Instruction
instructions <- IORef (IntMap Instruction) -> IO (IntMap Instruction)
forall a. IORef a -> IO a
readIORef IORef (IntMap Instruction)
ioref
  case Int -> IntMap Instruction -> Maybe Instruction
forall a. Int -> IntMap a -> Maybe a
IM.lookup (Word64 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word64
a) IntMap Instruction
instructions of
    Just Instruction
i -> Maybe Instruction -> IO (Maybe Instruction)
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe Instruction -> IO (Maybe Instruction))
-> Maybe Instruction -> IO (Maybe Instruction)
forall a b. (a -> b) -> a -> b
$ Instruction -> Maybe Instruction
forall a. a -> Maybe a
Just Instruction
i -- memoized
    Maybe Instruction
Nothing -> do
      let config :: Disassembler ()
config = [Word8] -> Word64 -> Disassembler ()
disasm_config [Word8]
buffer Word64
a
      Either CsErr [(CsInsn, ())]
result <- Disassembler () -> IO (Either CsErr [(CsInsn, ())])
forall a. Disassembler a -> IO (Either CsErr [(CsInsn, a)])
disasmIO Disassembler ()
config
      case Either CsErr [(CsInsn, ())]
result of
        Left CsErr
err      -> [Char] -> IO (Maybe Instruction)
forall a. HasCallStack => [Char] -> a
error ([Char]
"Error during disassembling of address " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Word64 -> [Char]
forall a. (Integral a, Show a) => a -> [Char]
showHex Word64
a [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
": " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ CsErr -> [Char]
forall a. Show a => a -> [Char]
show CsErr
err)
        Right [(CsInsn
i,()
_)] -> Maybe Instruction -> IO (Maybe Instruction)
memoize_instr (Maybe Instruction -> IO (Maybe Instruction))
-> Maybe Instruction -> IO (Maybe Instruction)
forall a b. (a -> b) -> a -> b
$ CsInsn -> Word64 -> Maybe Instruction
mk_instr CsInsn
i Word64
a
        Right [(CsInsn, ())]
x       -> [Char] -> IO (Maybe Instruction)
forall a. HasCallStack => [Char] -> a
error ([Char] -> IO (Maybe Instruction))
-> [Char] -> IO (Maybe Instruction)
forall a b. (a -> b) -> a -> b
$ [Char]
"Error during disassembling of address " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Word64 -> [Char]
forall a. (Integral a, Show a) => a -> [Char]
showHex Word64
a [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
": parsing result Capstone == " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [(CsInsn, ())] -> [Char]
forall a. Show a => a -> [Char]
show [(CsInsn, ())]
x [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
" @ address " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Word64 -> [Char]
forall a. (Integral a, Show a) => a -> [Char]
showHex Word64
a [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
" buffer == " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Word8] -> [Char]
forall a. (Integral a, Show a) => [a] -> [Char]
showHex_list [Word8]
buffer
 where
  memoize_instr :: Maybe Instruction -> IO (Maybe Instruction)
memoize_instr Maybe Instruction
Nothing = Maybe Instruction -> IO (Maybe Instruction)
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe Instruction
forall a. Maybe a
Nothing
  memoize_instr (Just Instruction
instr) = do
    IORef (IntMap Instruction)
-> (IntMap Instruction -> IntMap Instruction) -> IO ()
forall a. IORef a -> (a -> a) -> IO ()
modifyIORef' IORef (IntMap Instruction)
ioref (Int -> Instruction -> IntMap Instruction -> IntMap Instruction
forall a. Int -> a -> IntMap a -> IntMap a
IM.insert (Word64 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word64
a) Instruction
instr)
    Maybe Instruction -> IO (Maybe Instruction)
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe Instruction -> IO (Maybe Instruction))
-> Maybe Instruction -> IO (Maybe Instruction)
forall a b. (a -> b) -> a -> b
$ Instruction -> Maybe Instruction
forall a. a -> Maybe a
Just Instruction
instr