{-# LANGUAGE PartialTypeSignatures , FlexibleContexts, Strict, DeriveGeneric#-}
{-# OPTIONS_HADDOCK prune  #-}


{-|
Module      : JSON_Taxonomy
Description : Provides a taxonomy for the generated output.

Provides a taxonomy for output generated by FoxDec. The output is a JSON object, whose schema is defined by the `JSON` datatype below. That datatype provides its fields, such as `instructions` and `control_flow`.
This is an overview of some of the information retrievable from FoxDec.
For example, we have omitted control flow graphs and per-instruction invariants to keep the size of the JSON object manageable.
However, if more information is required, or if there is a request for information in a different form, then let us know.

On the right hand side, unfold the __synopsis__ for an overview.
-}

module Data.JSON_Taxonomy where


import Base

import Data.SValue
import Data.SymbolicExpression

import Generic.Binary
import Generic.SymbolicConstituents

import Analysis.Context

import qualified Generic.Address as GA
import qualified Generic.Operand as GO
import qualified Generic.Instruction as GI
import qualified X86.Instruction as X86
import X86.Opcode
import X86.Prefix
import X86.Register
import Generic.HasSize (HasSize(sizeof))


import qualified Data.Map as M
import qualified Data.IntMap as IM
import qualified Data.IntSet as IS
import qualified Data.Set as S
import qualified Data.Set.NonEmpty as NES


import Data.Maybe (fromJust,catMaybes,mapMaybe)
import Data.List 
import Data.Foldable
import Data.Word
import Data.Aeson
import GHC.Generics





-- * Taxonomy

-- | The top-level JSON datastructure consists of the following fields:
--
--     * `instructions`:  All instructions with their addresses and sizes.
--     * `control_flow`:  For each instruction address, this provides the set of successor instruction addresses. This includes indirections, if they were resolved.
--     * `function_boundaries`:  For each function entry address, this provides an overview of all instruction addresses belonging to that function.
--     * `function_summaries`:  For each function entry address, this provides a pre- and postcondition.
--     * `invariants`: For each each instruction address, for each function address that contains the instruction, this provides an invariant.
--     * `pointer_domains`: For each each instruction address, for each function address that contains the instruction, this provides a symbolic expression for the pointer of each memory operand.
--
-- Example of the entire JSON output:
--
-- > { "instructions": [...], "control_flow": [...], ... }

data JSON = JSON {
  JSON -> [Instruction]
instructions        :: [Instruction],                    -- ^ A list of `Instruction`s.
  JSON -> ControlFlow
control_flow        :: ControlFlow,                      -- ^ The __`ControlFlow`__ essentially is graph with as nodes instruction addresses.
  JSON -> [(Word64, FunctionBoundary)]
function_boundaries :: [(Word64,FunctionBoundary)],      -- ^ A mapping from function entry addresses to pretty-printed  __`FunctionBoundary`__s.
  JSON -> [(Word64, FunctionSummary)]
function_summaries  :: [(Word64,FunctionSummary)],       -- ^ A mapping from function entry addresses to __`FunctionSummary`__s.
  JSON -> [(Word64, [Invariant])]
invariants          :: [(Word64,[Invariant])],           -- ^ A mapping from instruction addresses to __`Invariant`__s.
  JSON -> [(Word64, Word64, [Maybe SValue])]
pointer_domains     :: [(Word64, Word64,[Maybe SValue])] -- ^ Per instruction address, per function entry, the __`PointerDomain`__ for each memory operand.

  }
  deriving (forall x. JSON -> Rep JSON x)
-> (forall x. Rep JSON x -> JSON) -> Generic JSON
forall x. Rep JSON x -> JSON
forall x. JSON -> Rep JSON x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep JSON x -> JSON
$cfrom :: forall x. JSON -> Rep JSON x
Generic


-- | The __ControlFlow__ is provided as a mapping from instruction addresses to lists of instruction addresses.
--
-- Example of JSON output:
-- 
--  > [5907,[5909,5929]]
--
-- This states the instruction at address 5907 has two possible successor instruction addresses (5909 and 5929).
type ControlFlow = [(Word64, [Word64])]


-- | The __FunctionBoundary__ is provided as a pretty-printed representation of all instruction addresses covered by the function.
--
-- Example of JSON output:
-- 
--  > "1200-->1238 ; 1280-->1284"
--
-- This __Function Boundary__ states that the function spans two non-consecutive ranges of instruction addresses.

type FunctionBoundary = String


-- | A __FunctionSummary__ contains:
--
--   * a preconditon: a __`Predicate`__ that holds whenever the function is called within the binary.
--   * a postconditon: a __`Postcondition`__ that holds whenever the function is returning, if the function returns normally.
--
--
-- Example of JSON output:
--
-- > {"precondition" : [...], "postcondition" : [...]}
data FunctionSummary = FunctionSummary {
    FunctionSummary -> FunctionBoundary
precondition :: String, -- TODO
    FunctionSummary -> Postcondition
postcondition :: Postcondition
  }
  deriving (forall x. FunctionSummary -> Rep FunctionSummary x)
-> (forall x. Rep FunctionSummary x -> FunctionSummary)
-> Generic FunctionSummary
forall x. Rep FunctionSummary x -> FunctionSummary
forall x. FunctionSummary -> Rep FunctionSummary x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep FunctionSummary x -> FunctionSummary
$cfrom :: forall x. FunctionSummary -> Rep FunctionSummary x
Generic

-- | An __Invariant__ contains:
--
--   * the entry address of the function in which the instruction occurs. Note that (rarely) the same instruction can occur in multiple functions.
--     Therefore, we provide an invariant per entry.
--   * an invariant: a `Predicate`.
--
-- Example of JSON output:
--
-- >  [4320,[[{"SP_Reg":"RIP"},{"SE_Immediate":4324}],[{"SP_Reg":"RAX"},...]]]
--
-- The instruction occurs in the function with entry address @4320@. Register @RIP@ is set to @4324@. Register @RAX@ is always equal to iths initial value.
type Invariant = (Word64,Data.JSON_Taxonomy.Predicate) -- TODO





-- | A __Postcondition__ provides information on the return status of a function.
-- 
-- Example of JSON output:
--
-- > {"Terminating" : [] }
data Postcondition =
    Terminating              -- ^ The function does never return
  | ReturningWith Data.JSON_Taxonomy.Predicate  -- ^ The function returns in a state satisfying the __`Predicate`__ TODO
  | UnknownRetBehavior       -- ^  It is unknown whether the function returns or not
 deriving (forall x. Postcondition -> Rep Postcondition x)
-> (forall x. Rep Postcondition x -> Postcondition)
-> Generic Postcondition
forall x. Rep Postcondition x -> Postcondition
forall x. Postcondition -> Rep Postcondition x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Postcondition x -> Postcondition
$cfrom :: forall x. Postcondition -> Rep Postcondition x
Generic


-- | A __Predicate__ is a mapping from state parts (registers, memory, flags) to symbolic expressions. TODO
-- 
-- Example of JSON output:
--
--  > [
--  >   [{"SP_Reg":"RDI"},{"Bottom":{"FromPointerBases":[{"Malloc":[4420,""]}]}}],
--  >   [{"SP_Reg":"RSI"},{"SE_Immediate":8217}]
--  > ]
--
--  This predicate states that register @RDI@ is a pointer with as base the return value of @malloc@, called at address @4420@.
--  Register @RSI@ contains an immediate value.
type Predicate = M.Map StatePart SValue



-- | An __Instruction__ has an address, a prefix (which may also be @null@), a mnemonic/opcode, a list of operands (possibly empty) and a size in bytes.
--
-- Example of JSON output:
--
-- > {"size":7,"prefix":"BND","addr":4420,"opcode":"JMP","operands":[{"Memory":[{"AddressImm":11869},8]}]}
data Instruction = Instruction {
  Instruction -> Word64
addr     :: Word64,            -- ^ address
  Instruction -> Maybe Prefix
prefix   :: Maybe Prefix,      -- ^ prefix, e.g., lock or repz
  Instruction -> Opcode
opcode   :: Opcode,            -- ^ opcode/mnemonic
  Instruction -> [Operand]
operands :: [Operand],         -- ^ possibly empty list of operands 
  Instruction -> Int
size     :: Int                -- ^ size of instruction
 }
 deriving (forall x. Instruction -> Rep Instruction x)
-> (forall x. Rep Instruction x -> Instruction)
-> Generic Instruction
forall x. Rep Instruction x -> Instruction
forall x. Instruction -> Rep Instruction x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Instruction x -> Instruction
$cfrom :: forall x. Instruction -> Rep Instruction x
Generic


-- | An __Operand__ is either a memory operand, an effective address (in case of @LEA@), a register or an immediate value.
--
-- Example of JSON output:
--
-- > {"Memory":[{"AddressPlus":[{"AddressRegister":"RIP"},{"AddressImm":11869}]},8]}
data Operand =
    Memory Address Int           -- ^ A region in memory, with an `Address` and a size in bytes
  | EffectiveAddress Address     -- ^ An address itself, but not the value stored at the address.
  | Register Register            -- ^ A storage location such as a register or a variable
  | Immediate Word64             -- ^ An immediate value
  deriving (forall x. Operand -> Rep Operand x)
-> (forall x. Rep Operand x -> Operand) -> Generic Operand
forall x. Rep Operand x -> Operand
forall x. Operand -> Rep Operand x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Operand x -> Operand
$cfrom :: forall x. Operand -> Rep Operand x
Generic



-- | An __Address__ is the unresolved address computation in the memory operand of an instruction.
--
-- Example of JSON output:
--
-- > {"AddressPlus":[{"AddressRegister":"RIP"},{"AddressImm":11869}]}
data Address =
    AddressRegister Register      -- ^ Reading a pointer from a storage
  | AddressImm Word64             -- ^ Immediate value 
  | AddressPlus Address Address   -- ^ Plus
  | AddressMinus Address Address  -- ^ Minus
  | AddressTimes Address Address  -- ^ Times
  deriving (forall x. Address -> Rep Address x)
-> (forall x. Rep Address x -> Address) -> Generic Address
forall x. Rep Address x -> Address
forall x. Address -> Rep Address x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Address x -> Address
$cfrom :: forall x. Address -> Rep Address x
Generic




mk_json_address :: GenericAddress Register -> Address
mk_json_address (GA.AddressStorage Register
r) = Register -> Address
AddressRegister Register
r
mk_json_address (GA.AddressImm Word64
i) = Word64 -> Address
AddressImm Word64
i
mk_json_address (GA.AddressPlus GenericAddress Register
a0 GenericAddress Register
a1) = Address -> Address -> Address
AddressPlus (GenericAddress Register -> Address
mk_json_address GenericAddress Register
a0) (GenericAddress Register -> Address
mk_json_address GenericAddress Register
a1)
mk_json_address (GA.AddressMinus GenericAddress Register
a0 GenericAddress Register
a1) = Address -> Address -> Address
AddressMinus (GenericAddress Register -> Address
mk_json_address GenericAddress Register
a0) (GenericAddress Register -> Address
mk_json_address GenericAddress Register
a1)
mk_json_address (GA.AddressTimes GenericAddress Register
a0 GenericAddress Register
a1) = Address -> Address -> Address
AddressTimes (GenericAddress Register -> Address
mk_json_address GenericAddress Register
a0) (GenericAddress Register -> Address
mk_json_address GenericAddress Register
a1)

mk_json_operand :: GenericOperand Register -> Operand
mk_json_operand (GO.Memory GenericAddress Register
a Int
si) = Address -> Int -> Operand
Memory (GenericAddress Register -> Address
mk_json_address GenericAddress Register
a) Int
si
mk_json_operand (GO.EffectiveAddress GenericAddress Register
a) = Address -> Operand
EffectiveAddress (GenericAddress Register -> Address
mk_json_address GenericAddress Register
a)
mk_json_operand (GO.Storage Register
r) = Register -> Operand
Register Register
r
mk_json_operand (GO.Immediate Word64
i) = Word64 -> Operand
Immediate Word64
i

mk_json_instruction :: GenericInstruction AddressWord64 Register Prefix Opcode annotation
-> Instruction
mk_json_instruction i :: GenericInstruction AddressWord64 Register Prefix Opcode annotation
i@(GI.Instruction (GA.AddressWord64 Word64
a) Maybe Prefix
p Opcode
op Maybe (GenericOperand Register)
Nothing [GenericOperand Register]
srcs Maybe annotation
_) = Word64 -> Maybe Prefix -> Opcode -> [Operand] -> Int -> Instruction
Instruction Word64
a Maybe Prefix
p Opcode
op ((GenericOperand Register -> Operand)
-> [GenericOperand Register] -> [Operand]
forall a b. (a -> b) -> [a] -> [b]
map GenericOperand Register -> Operand
mk_json_operand [GenericOperand Register]
srcs) (GenericInstruction AddressWord64 Register Prefix Opcode annotation
-> Int
forall a. HasSize a => a -> Int
sizeof GenericInstruction AddressWord64 Register Prefix Opcode annotation
i)

mk_json_predicate :: Analysis.Context.Predicate -> Data.JSON_Taxonomy.Predicate
mk_json_predicate :: Predicate -> Predicate
mk_json_predicate (Sstate Map Register SValue
regs Map (SPointer, RegionSize) SValue
mem FlagStatus
flg) = (Register -> StatePart) -> Map Register SValue -> Predicate
forall k2 k1 a. Ord k2 => (k1 -> k2) -> Map k1 a -> Map k2 a
M.mapKeys Register -> StatePart
mk_reg Map Register SValue
regs
 where
  mk_reg :: Register -> StatePart
mk_reg Register
r = Register -> StatePart
SP_Reg Register
r

mk_json_post :: Maybe FReturnBehavior -> Postcondition
mk_json_post Maybe FReturnBehavior
Nothing                                    = Postcondition
Data.JSON_Taxonomy.UnknownRetBehavior 
mk_json_post (Just FReturnBehavior
Analysis.Context.UnknownRetBehavior) = Postcondition
Data.JSON_Taxonomy.UnknownRetBehavior
mk_json_post (Just FReturnBehavior
Analysis.Context.Terminating)        = Postcondition
Data.JSON_Taxonomy.Terminating
mk_json_post (Just (Analysis.Context.ReturningWith Predicate
q))  = Predicate -> Postcondition
Data.JSON_Taxonomy.ReturningWith (Predicate -> Postcondition) -> Predicate -> Postcondition
forall a b. (a -> b) -> a -> b
$ Predicate -> Predicate
mk_json_predicate Predicate
q