GetProcAddress by Hash Implementation MASM 64bit











up vote
0
down vote

favorite












I have been working on a function called "funcCallFunctionByHash", which accepts a 64bit DJB2 hash of the function name and either the base address or name of the DLL that exports it. A third parameter is used to define which mode the function operates in. The function preserves the four registers used in Micorosoft's x64 calling convention (rcx, rdx, r8 and r9) by pushing them to stack at the start of the function and popping them before calling the desired function.



xor r12, r12
lea r11, lpLibFileName
mov r10, 00BF82C4B790C612CEh
mov rcx, 10h
call funcCallFunctionByHash
lpLibFileName db "kernel32.dll", 0


I use the above code to call the function. In this case I am passing the hash for ExitProcess in r10(I will add my hashing code at the bottom of this post). I am also passing lpLibFileName using r11 and clearing r12 (qwLookupMode) so the function operates in LOOKUP_BY_NAME mode. rcx is initialised to 16 as this will be the exit code of the process if it works.



I am using ExitProcess because it is forwarded to "NTDLL.RtlExitUserProcess. It allows me to test all parts of the function.



funcCallFunctionByHash proc

;save function parameters
push rcx
push rdx
push r8
push r9

push r14
push r15
push rbp

;not in use yet
cmp r12, LOOKUP_BY_NAME
cmove rdi, r11
je _LookupLoadLibrary

_ParseDllHeader:
mov eax, dword ptr [r11+3ch] ;IMAGE_DOS_HEADER->e_lfanew
lea rax, qword ptr [r11+rax+88h];ImageBaseAddress + e_lfanew + 88h
;88h = IMAGE_NT_HEADERS64 (size is 18h)
;IMAGE_OPTIONAL_HEADER (size is 70h - including IMAGE_DATA_DIRECTORY[16])
mov r14d, dword ptr[rax+04h] ;IMAGE_DATA_DIRECTORY[0]->Size
mov eax, dword ptr[rax] ;IMAGE_DATA_DIRECTORY[0]->VirtualAddress
mov r15d, eax ;save VirtualAddress for forwarded function testing
add rax, r11 ;ImageBaseAddress + IMAGE_DATA_DIRECTORY[0]->VirtualAddress

mov ecx, dword ptr[rax+18h] ;IMAGE_EXPORT_DIRECTORY->NumberOfNames
mov r8d, dword ptr[rax+20h] ;IMAGE_EXPORT_DIRECTORY->AddressOfNames
add r8, r11 ;ImageBaseAddress + AddressOfNames

_ForEachName:

dec ecx
jz _Failed ;could not find function in exports
;AddressOfNames is a RVA to the list of exported names
;it points to an array of NumberOfNames 32-bit values
;each being a RVA to the exported symbol name.
lea r9, [r8+04*rcx] ;AddressOfNames[NumberOfNames*04h]
mov r9d, dword ptr[r9] ;follow pointer to get second RVA
add r9, r11 ;ImageBaseAddress + AddressOfNames (second RVA)
;r9 now point to function name

mov rdx, 5381d ;could use edx here if it is shorter
_HashLoop:
mov rbx, rdx
shl rdx, 5
add rdx, rbx
xor dl, byte ptr[r9] ;xor with each character
inc r9
cmp byte ptr[r9], 00h ;check for null termination
jne _HashLoop
cmp rdx, r10
jne _ForEachName

xor rbx, rbx
mov r8d, dword ptr[rax+24h] ;IMAGE_EXPORT_DIRECTORY->AddressOfNameOrdinals
add r8, r11 ;ImageBaseAddress + AddressOfNameOrdinals
mov bx, word ptr[r8+rcx*02h] ;AddressOfNameOrdinals[AddressOfNamesIndex[rcx]*02]

mov r8d, dword ptr[rax+1ch] ;IMAGE_EXPORT_DIRECTORY->AddressOfFunctions
add r8, r11 ;ImageBaseAddress + AddressOfFunctions
mov eax, dword ptr[r8+rbx*04h] ;AddressOfFunctions[FunctionOrdinal*04h]

;check if forwarded
;(r8d > IMAGE_DATA_DIRECTORY[0]->VirtualAddress AND r8d < VirtualAddress + Size)
cmp rax, r15
jb _NotForwarded
add r14, r15
cmp rax, r14
jb _Forwarded

_NotForwarded:
add rax, r11 ;ImageBaseAddress + AddressOfFunctions[FunctionOrdinal*04h]
cmp r12, LOOKUP_BY_NAME ;check for LookUpByNameFlag == LOOKUP_BY_NAME (0)
cmove rcx, rdi ;conditional move dll name into lpLibFileName for LoadLibrary
je _CallLoadLibraryDirect
cmp r13, 00h ;check for the LoadLibraryFlag, is "test r13, r13" better?
jne _CallLoadLibrary
pop rbp ;pop registers used by function
pop r15
pop r14

pop r9 ;pop parameters to looked up function
pop r8
pop rdx
pop rcx
;may need to "sub rsp, 20h" to make shadow space for function
call rax
ret

_Forwarded:

;rsi is only set here which means that after returning from ParseDllHeader to CallLoadLibrary
;rax = LoadLibrary
;rsi = forwarded function name
add rax, r11 ;ImageBaseAddress + AddressOfFunctions[FunctionOrdinal*04h]
mov rsi, rax ;rsi is used ScanFor2E
_LookupLoadLibrary:
;TODO: check if requested dll is already present in _PED_LDR_DATA (need to write a UNICODE_STRING hasher)
mov r9, qword ptr gs:[60h] ;PEB
mov r9, qword ptr[r9+18h] ;PEB->Ldr (_PEB_LDR_DATA)
lea r9, qword ptr[r9+10h] ;_PEB_LDR_DATA->InLoadOrderModuleList (LDR_DATA_TABLE_ENTRY)
mov r9, qword ptr[r9] ;_LIST_ENTRY->Flink (_LIST_ENTRY)
mov r9, qword ptr[r9] ;_LIST_ENTRY->Flink (_LIST_ENTRY)
mov r9, qword ptr[r9] ;_LIST_ENTRY->Flink (_LIST_ENTRY)

;save the base address (kernel32)
mov r11, [r9+30h]

;qwLoadLibraryHash
mov r13, 00b9a3b50901ed9addh;set qwHashComparison
xchg r10, r13 ;this is required because in the case of qwLookupMode == 0 ie LOOKUP_BY_NAME
;this saves the requested qwHash to r13 for use later
jmp _ParseDllHeader

_CallLoadLibrary:

;if CallLoadLibrary is jumped to it means that a forwarded function was requested and the code has ALREADY
; 1. looked up the address of LoadLibrary - which is now in rax
; 2. initialised rsi to point to the forwarder string
xor rcx, rcx
_ScanFor2E:
cmp byte ptr[rsi+rcx], 2Eh
lea rcx, qword ptr[rcx+01h] ;lea does not effect flags, putting the increment here means
jne _ScanFor2E ;[rsi+rcx] points to "2Eh"

mov r12, rcx
add r12, 0Ch ;0Ch = 04h ("dll") + 07h (rounding up to multiple of 08h)
and r12, 0fffffff8h ;see StackOverflow answer

sub rsp, r12 ;reserve stack space for dll name
mov rdi, rsp

;can i optimise this to avoid having to push and pop rax?
; 1. move it to a different register before this point?
; 2. implement my own string instructions
push rax
_CopyForwarderString:
lodsb
stosb
loop _CopyForwarderString
pop rax

;write "dll"
mov dword ptr[rdi], 006c6c64h

;hash from rsi to /0
mov rdx, 5381d ;could use edx here if it is shorter
_HashLoop2:
mov rbx, rdx
shl rdx, 5
add rdx, rbx
xor dl, byte ptr[rsi] ;xor with each character
inc rsi
cmp byte ptr[rsi], 00h ;check for null termination
jne _HashLoop2
mov r13, rdx

;current situation:
;r10 is the hash of the forwarded function
;r11 is kernel32.dll ImageBaseAddress
;r12 is the size of stack space reserved by dll name

mov rcx, rsp ;lpLibFileName
_CallLoadLibraryDirect:
;if CallLoadLibraryDirect is jumped to the following conditions must be true:
; 1. rcx points to either:
; a. parsed forwarder string
; b. passed lpLibFileName when qwLookupMode == 0
;2. r12 needs to be 0 to preserve the stack (if we arrived at CallLoadLibraryDirect from CallLoadLibrary then r12
; will be initialised with the number of bytes reserved on the stack

;annoyingly r10 gets destroyed by LoadLibrary
;we need to hash calculated in CallLoadLibrary to survive this
;so after LoadLibrary is called we can pass the hash and ImageBaseAddress to ParseDllHeader
sub rsp, 20h ;reserve shadow space for four registers (20h = 08h * 04h)
call rax ;LoadLibraryA
add rsp, 20h ;restore stack
add rsp, r12 ;remove lpLibFileName / dll name from stack

;we have now called LoadLibrary so we need to do the following:
;after calling LoadLibrary for when qwLookupMode == 0
;if the function is also forwarded then it will jump to CallLoadLibrary
;if the function is not forwarded then it will be called
;
;when qwLookupMode == 1
;if we are calling LoadLibrary the function is forwarded
;
;this means that if RIP is here the function was forwarded or we are calling directly.
;if we are calling directly we need to make r12 == 0 when its LOOKUP_BY_NAME
mov r11, rax ;rax is the return value of LoadLibrary -> ImageBaseAddress
mov r10, r13 ;LookupLoadLibrary exchanges r10 and r13, put the original qwHash back into r10
mov r12, r13 ;make qwLookupMode != 0 so that we dont attempt to LoadLibrary again
xor r13, r13 ;clear qwLoadLibraryFlag so we dont attempt to call LoadLibrary again
jmp _ParseDllHeader

_Failed:
xor r10, r10
pop rbp ;pop registers used by function
pop r15
pop r14

pop r9 ;pop parameters to looked up function
pop r8
pop rdx
pop rcx
ret
funcCallFunctionByHash endp


Hash Function



utilHash proc
;rcx - null terminated function name
push rcx
push rdx

mov rax, 5381d
hl:
mov rdx, rax
shl rax, 5
add rax, rdx
xor al, [rcx]
inc rcx
;check for null termination
cmp byte ptr[rcx], 00h
jne short hl

pop rdx
pop rcx
ret

utilHash endp


I am interested in increasing performance, reducing memory usage, lowering the number of registers used (r14 and r15 are prime examples).



Any improvements I can make to my coding style or other suggestions would be appreciated.



Thanks










share|improve this question


























    up vote
    0
    down vote

    favorite












    I have been working on a function called "funcCallFunctionByHash", which accepts a 64bit DJB2 hash of the function name and either the base address or name of the DLL that exports it. A third parameter is used to define which mode the function operates in. The function preserves the four registers used in Micorosoft's x64 calling convention (rcx, rdx, r8 and r9) by pushing them to stack at the start of the function and popping them before calling the desired function.



    xor r12, r12
    lea r11, lpLibFileName
    mov r10, 00BF82C4B790C612CEh
    mov rcx, 10h
    call funcCallFunctionByHash
    lpLibFileName db "kernel32.dll", 0


    I use the above code to call the function. In this case I am passing the hash for ExitProcess in r10(I will add my hashing code at the bottom of this post). I am also passing lpLibFileName using r11 and clearing r12 (qwLookupMode) so the function operates in LOOKUP_BY_NAME mode. rcx is initialised to 16 as this will be the exit code of the process if it works.



    I am using ExitProcess because it is forwarded to "NTDLL.RtlExitUserProcess. It allows me to test all parts of the function.



    funcCallFunctionByHash proc

    ;save function parameters
    push rcx
    push rdx
    push r8
    push r9

    push r14
    push r15
    push rbp

    ;not in use yet
    cmp r12, LOOKUP_BY_NAME
    cmove rdi, r11
    je _LookupLoadLibrary

    _ParseDllHeader:
    mov eax, dword ptr [r11+3ch] ;IMAGE_DOS_HEADER->e_lfanew
    lea rax, qword ptr [r11+rax+88h];ImageBaseAddress + e_lfanew + 88h
    ;88h = IMAGE_NT_HEADERS64 (size is 18h)
    ;IMAGE_OPTIONAL_HEADER (size is 70h - including IMAGE_DATA_DIRECTORY[16])
    mov r14d, dword ptr[rax+04h] ;IMAGE_DATA_DIRECTORY[0]->Size
    mov eax, dword ptr[rax] ;IMAGE_DATA_DIRECTORY[0]->VirtualAddress
    mov r15d, eax ;save VirtualAddress for forwarded function testing
    add rax, r11 ;ImageBaseAddress + IMAGE_DATA_DIRECTORY[0]->VirtualAddress

    mov ecx, dword ptr[rax+18h] ;IMAGE_EXPORT_DIRECTORY->NumberOfNames
    mov r8d, dword ptr[rax+20h] ;IMAGE_EXPORT_DIRECTORY->AddressOfNames
    add r8, r11 ;ImageBaseAddress + AddressOfNames

    _ForEachName:

    dec ecx
    jz _Failed ;could not find function in exports
    ;AddressOfNames is a RVA to the list of exported names
    ;it points to an array of NumberOfNames 32-bit values
    ;each being a RVA to the exported symbol name.
    lea r9, [r8+04*rcx] ;AddressOfNames[NumberOfNames*04h]
    mov r9d, dword ptr[r9] ;follow pointer to get second RVA
    add r9, r11 ;ImageBaseAddress + AddressOfNames (second RVA)
    ;r9 now point to function name

    mov rdx, 5381d ;could use edx here if it is shorter
    _HashLoop:
    mov rbx, rdx
    shl rdx, 5
    add rdx, rbx
    xor dl, byte ptr[r9] ;xor with each character
    inc r9
    cmp byte ptr[r9], 00h ;check for null termination
    jne _HashLoop
    cmp rdx, r10
    jne _ForEachName

    xor rbx, rbx
    mov r8d, dword ptr[rax+24h] ;IMAGE_EXPORT_DIRECTORY->AddressOfNameOrdinals
    add r8, r11 ;ImageBaseAddress + AddressOfNameOrdinals
    mov bx, word ptr[r8+rcx*02h] ;AddressOfNameOrdinals[AddressOfNamesIndex[rcx]*02]

    mov r8d, dword ptr[rax+1ch] ;IMAGE_EXPORT_DIRECTORY->AddressOfFunctions
    add r8, r11 ;ImageBaseAddress + AddressOfFunctions
    mov eax, dword ptr[r8+rbx*04h] ;AddressOfFunctions[FunctionOrdinal*04h]

    ;check if forwarded
    ;(r8d > IMAGE_DATA_DIRECTORY[0]->VirtualAddress AND r8d < VirtualAddress + Size)
    cmp rax, r15
    jb _NotForwarded
    add r14, r15
    cmp rax, r14
    jb _Forwarded

    _NotForwarded:
    add rax, r11 ;ImageBaseAddress + AddressOfFunctions[FunctionOrdinal*04h]
    cmp r12, LOOKUP_BY_NAME ;check for LookUpByNameFlag == LOOKUP_BY_NAME (0)
    cmove rcx, rdi ;conditional move dll name into lpLibFileName for LoadLibrary
    je _CallLoadLibraryDirect
    cmp r13, 00h ;check for the LoadLibraryFlag, is "test r13, r13" better?
    jne _CallLoadLibrary
    pop rbp ;pop registers used by function
    pop r15
    pop r14

    pop r9 ;pop parameters to looked up function
    pop r8
    pop rdx
    pop rcx
    ;may need to "sub rsp, 20h" to make shadow space for function
    call rax
    ret

    _Forwarded:

    ;rsi is only set here which means that after returning from ParseDllHeader to CallLoadLibrary
    ;rax = LoadLibrary
    ;rsi = forwarded function name
    add rax, r11 ;ImageBaseAddress + AddressOfFunctions[FunctionOrdinal*04h]
    mov rsi, rax ;rsi is used ScanFor2E
    _LookupLoadLibrary:
    ;TODO: check if requested dll is already present in _PED_LDR_DATA (need to write a UNICODE_STRING hasher)
    mov r9, qword ptr gs:[60h] ;PEB
    mov r9, qword ptr[r9+18h] ;PEB->Ldr (_PEB_LDR_DATA)
    lea r9, qword ptr[r9+10h] ;_PEB_LDR_DATA->InLoadOrderModuleList (LDR_DATA_TABLE_ENTRY)
    mov r9, qword ptr[r9] ;_LIST_ENTRY->Flink (_LIST_ENTRY)
    mov r9, qword ptr[r9] ;_LIST_ENTRY->Flink (_LIST_ENTRY)
    mov r9, qword ptr[r9] ;_LIST_ENTRY->Flink (_LIST_ENTRY)

    ;save the base address (kernel32)
    mov r11, [r9+30h]

    ;qwLoadLibraryHash
    mov r13, 00b9a3b50901ed9addh;set qwHashComparison
    xchg r10, r13 ;this is required because in the case of qwLookupMode == 0 ie LOOKUP_BY_NAME
    ;this saves the requested qwHash to r13 for use later
    jmp _ParseDllHeader

    _CallLoadLibrary:

    ;if CallLoadLibrary is jumped to it means that a forwarded function was requested and the code has ALREADY
    ; 1. looked up the address of LoadLibrary - which is now in rax
    ; 2. initialised rsi to point to the forwarder string
    xor rcx, rcx
    _ScanFor2E:
    cmp byte ptr[rsi+rcx], 2Eh
    lea rcx, qword ptr[rcx+01h] ;lea does not effect flags, putting the increment here means
    jne _ScanFor2E ;[rsi+rcx] points to "2Eh"

    mov r12, rcx
    add r12, 0Ch ;0Ch = 04h ("dll") + 07h (rounding up to multiple of 08h)
    and r12, 0fffffff8h ;see StackOverflow answer

    sub rsp, r12 ;reserve stack space for dll name
    mov rdi, rsp

    ;can i optimise this to avoid having to push and pop rax?
    ; 1. move it to a different register before this point?
    ; 2. implement my own string instructions
    push rax
    _CopyForwarderString:
    lodsb
    stosb
    loop _CopyForwarderString
    pop rax

    ;write "dll"
    mov dword ptr[rdi], 006c6c64h

    ;hash from rsi to /0
    mov rdx, 5381d ;could use edx here if it is shorter
    _HashLoop2:
    mov rbx, rdx
    shl rdx, 5
    add rdx, rbx
    xor dl, byte ptr[rsi] ;xor with each character
    inc rsi
    cmp byte ptr[rsi], 00h ;check for null termination
    jne _HashLoop2
    mov r13, rdx

    ;current situation:
    ;r10 is the hash of the forwarded function
    ;r11 is kernel32.dll ImageBaseAddress
    ;r12 is the size of stack space reserved by dll name

    mov rcx, rsp ;lpLibFileName
    _CallLoadLibraryDirect:
    ;if CallLoadLibraryDirect is jumped to the following conditions must be true:
    ; 1. rcx points to either:
    ; a. parsed forwarder string
    ; b. passed lpLibFileName when qwLookupMode == 0
    ;2. r12 needs to be 0 to preserve the stack (if we arrived at CallLoadLibraryDirect from CallLoadLibrary then r12
    ; will be initialised with the number of bytes reserved on the stack

    ;annoyingly r10 gets destroyed by LoadLibrary
    ;we need to hash calculated in CallLoadLibrary to survive this
    ;so after LoadLibrary is called we can pass the hash and ImageBaseAddress to ParseDllHeader
    sub rsp, 20h ;reserve shadow space for four registers (20h = 08h * 04h)
    call rax ;LoadLibraryA
    add rsp, 20h ;restore stack
    add rsp, r12 ;remove lpLibFileName / dll name from stack

    ;we have now called LoadLibrary so we need to do the following:
    ;after calling LoadLibrary for when qwLookupMode == 0
    ;if the function is also forwarded then it will jump to CallLoadLibrary
    ;if the function is not forwarded then it will be called
    ;
    ;when qwLookupMode == 1
    ;if we are calling LoadLibrary the function is forwarded
    ;
    ;this means that if RIP is here the function was forwarded or we are calling directly.
    ;if we are calling directly we need to make r12 == 0 when its LOOKUP_BY_NAME
    mov r11, rax ;rax is the return value of LoadLibrary -> ImageBaseAddress
    mov r10, r13 ;LookupLoadLibrary exchanges r10 and r13, put the original qwHash back into r10
    mov r12, r13 ;make qwLookupMode != 0 so that we dont attempt to LoadLibrary again
    xor r13, r13 ;clear qwLoadLibraryFlag so we dont attempt to call LoadLibrary again
    jmp _ParseDllHeader

    _Failed:
    xor r10, r10
    pop rbp ;pop registers used by function
    pop r15
    pop r14

    pop r9 ;pop parameters to looked up function
    pop r8
    pop rdx
    pop rcx
    ret
    funcCallFunctionByHash endp


    Hash Function



    utilHash proc
    ;rcx - null terminated function name
    push rcx
    push rdx

    mov rax, 5381d
    hl:
    mov rdx, rax
    shl rax, 5
    add rax, rdx
    xor al, [rcx]
    inc rcx
    ;check for null termination
    cmp byte ptr[rcx], 00h
    jne short hl

    pop rdx
    pop rcx
    ret

    utilHash endp


    I am interested in increasing performance, reducing memory usage, lowering the number of registers used (r14 and r15 are prime examples).



    Any improvements I can make to my coding style or other suggestions would be appreciated.



    Thanks










    share|improve this question
























      up vote
      0
      down vote

      favorite









      up vote
      0
      down vote

      favorite











      I have been working on a function called "funcCallFunctionByHash", which accepts a 64bit DJB2 hash of the function name and either the base address or name of the DLL that exports it. A third parameter is used to define which mode the function operates in. The function preserves the four registers used in Micorosoft's x64 calling convention (rcx, rdx, r8 and r9) by pushing them to stack at the start of the function and popping them before calling the desired function.



      xor r12, r12
      lea r11, lpLibFileName
      mov r10, 00BF82C4B790C612CEh
      mov rcx, 10h
      call funcCallFunctionByHash
      lpLibFileName db "kernel32.dll", 0


      I use the above code to call the function. In this case I am passing the hash for ExitProcess in r10(I will add my hashing code at the bottom of this post). I am also passing lpLibFileName using r11 and clearing r12 (qwLookupMode) so the function operates in LOOKUP_BY_NAME mode. rcx is initialised to 16 as this will be the exit code of the process if it works.



      I am using ExitProcess because it is forwarded to "NTDLL.RtlExitUserProcess. It allows me to test all parts of the function.



      funcCallFunctionByHash proc

      ;save function parameters
      push rcx
      push rdx
      push r8
      push r9

      push r14
      push r15
      push rbp

      ;not in use yet
      cmp r12, LOOKUP_BY_NAME
      cmove rdi, r11
      je _LookupLoadLibrary

      _ParseDllHeader:
      mov eax, dword ptr [r11+3ch] ;IMAGE_DOS_HEADER->e_lfanew
      lea rax, qword ptr [r11+rax+88h];ImageBaseAddress + e_lfanew + 88h
      ;88h = IMAGE_NT_HEADERS64 (size is 18h)
      ;IMAGE_OPTIONAL_HEADER (size is 70h - including IMAGE_DATA_DIRECTORY[16])
      mov r14d, dword ptr[rax+04h] ;IMAGE_DATA_DIRECTORY[0]->Size
      mov eax, dword ptr[rax] ;IMAGE_DATA_DIRECTORY[0]->VirtualAddress
      mov r15d, eax ;save VirtualAddress for forwarded function testing
      add rax, r11 ;ImageBaseAddress + IMAGE_DATA_DIRECTORY[0]->VirtualAddress

      mov ecx, dword ptr[rax+18h] ;IMAGE_EXPORT_DIRECTORY->NumberOfNames
      mov r8d, dword ptr[rax+20h] ;IMAGE_EXPORT_DIRECTORY->AddressOfNames
      add r8, r11 ;ImageBaseAddress + AddressOfNames

      _ForEachName:

      dec ecx
      jz _Failed ;could not find function in exports
      ;AddressOfNames is a RVA to the list of exported names
      ;it points to an array of NumberOfNames 32-bit values
      ;each being a RVA to the exported symbol name.
      lea r9, [r8+04*rcx] ;AddressOfNames[NumberOfNames*04h]
      mov r9d, dword ptr[r9] ;follow pointer to get second RVA
      add r9, r11 ;ImageBaseAddress + AddressOfNames (second RVA)
      ;r9 now point to function name

      mov rdx, 5381d ;could use edx here if it is shorter
      _HashLoop:
      mov rbx, rdx
      shl rdx, 5
      add rdx, rbx
      xor dl, byte ptr[r9] ;xor with each character
      inc r9
      cmp byte ptr[r9], 00h ;check for null termination
      jne _HashLoop
      cmp rdx, r10
      jne _ForEachName

      xor rbx, rbx
      mov r8d, dword ptr[rax+24h] ;IMAGE_EXPORT_DIRECTORY->AddressOfNameOrdinals
      add r8, r11 ;ImageBaseAddress + AddressOfNameOrdinals
      mov bx, word ptr[r8+rcx*02h] ;AddressOfNameOrdinals[AddressOfNamesIndex[rcx]*02]

      mov r8d, dword ptr[rax+1ch] ;IMAGE_EXPORT_DIRECTORY->AddressOfFunctions
      add r8, r11 ;ImageBaseAddress + AddressOfFunctions
      mov eax, dword ptr[r8+rbx*04h] ;AddressOfFunctions[FunctionOrdinal*04h]

      ;check if forwarded
      ;(r8d > IMAGE_DATA_DIRECTORY[0]->VirtualAddress AND r8d < VirtualAddress + Size)
      cmp rax, r15
      jb _NotForwarded
      add r14, r15
      cmp rax, r14
      jb _Forwarded

      _NotForwarded:
      add rax, r11 ;ImageBaseAddress + AddressOfFunctions[FunctionOrdinal*04h]
      cmp r12, LOOKUP_BY_NAME ;check for LookUpByNameFlag == LOOKUP_BY_NAME (0)
      cmove rcx, rdi ;conditional move dll name into lpLibFileName for LoadLibrary
      je _CallLoadLibraryDirect
      cmp r13, 00h ;check for the LoadLibraryFlag, is "test r13, r13" better?
      jne _CallLoadLibrary
      pop rbp ;pop registers used by function
      pop r15
      pop r14

      pop r9 ;pop parameters to looked up function
      pop r8
      pop rdx
      pop rcx
      ;may need to "sub rsp, 20h" to make shadow space for function
      call rax
      ret

      _Forwarded:

      ;rsi is only set here which means that after returning from ParseDllHeader to CallLoadLibrary
      ;rax = LoadLibrary
      ;rsi = forwarded function name
      add rax, r11 ;ImageBaseAddress + AddressOfFunctions[FunctionOrdinal*04h]
      mov rsi, rax ;rsi is used ScanFor2E
      _LookupLoadLibrary:
      ;TODO: check if requested dll is already present in _PED_LDR_DATA (need to write a UNICODE_STRING hasher)
      mov r9, qword ptr gs:[60h] ;PEB
      mov r9, qword ptr[r9+18h] ;PEB->Ldr (_PEB_LDR_DATA)
      lea r9, qword ptr[r9+10h] ;_PEB_LDR_DATA->InLoadOrderModuleList (LDR_DATA_TABLE_ENTRY)
      mov r9, qword ptr[r9] ;_LIST_ENTRY->Flink (_LIST_ENTRY)
      mov r9, qword ptr[r9] ;_LIST_ENTRY->Flink (_LIST_ENTRY)
      mov r9, qword ptr[r9] ;_LIST_ENTRY->Flink (_LIST_ENTRY)

      ;save the base address (kernel32)
      mov r11, [r9+30h]

      ;qwLoadLibraryHash
      mov r13, 00b9a3b50901ed9addh;set qwHashComparison
      xchg r10, r13 ;this is required because in the case of qwLookupMode == 0 ie LOOKUP_BY_NAME
      ;this saves the requested qwHash to r13 for use later
      jmp _ParseDllHeader

      _CallLoadLibrary:

      ;if CallLoadLibrary is jumped to it means that a forwarded function was requested and the code has ALREADY
      ; 1. looked up the address of LoadLibrary - which is now in rax
      ; 2. initialised rsi to point to the forwarder string
      xor rcx, rcx
      _ScanFor2E:
      cmp byte ptr[rsi+rcx], 2Eh
      lea rcx, qword ptr[rcx+01h] ;lea does not effect flags, putting the increment here means
      jne _ScanFor2E ;[rsi+rcx] points to "2Eh"

      mov r12, rcx
      add r12, 0Ch ;0Ch = 04h ("dll") + 07h (rounding up to multiple of 08h)
      and r12, 0fffffff8h ;see StackOverflow answer

      sub rsp, r12 ;reserve stack space for dll name
      mov rdi, rsp

      ;can i optimise this to avoid having to push and pop rax?
      ; 1. move it to a different register before this point?
      ; 2. implement my own string instructions
      push rax
      _CopyForwarderString:
      lodsb
      stosb
      loop _CopyForwarderString
      pop rax

      ;write "dll"
      mov dword ptr[rdi], 006c6c64h

      ;hash from rsi to /0
      mov rdx, 5381d ;could use edx here if it is shorter
      _HashLoop2:
      mov rbx, rdx
      shl rdx, 5
      add rdx, rbx
      xor dl, byte ptr[rsi] ;xor with each character
      inc rsi
      cmp byte ptr[rsi], 00h ;check for null termination
      jne _HashLoop2
      mov r13, rdx

      ;current situation:
      ;r10 is the hash of the forwarded function
      ;r11 is kernel32.dll ImageBaseAddress
      ;r12 is the size of stack space reserved by dll name

      mov rcx, rsp ;lpLibFileName
      _CallLoadLibraryDirect:
      ;if CallLoadLibraryDirect is jumped to the following conditions must be true:
      ; 1. rcx points to either:
      ; a. parsed forwarder string
      ; b. passed lpLibFileName when qwLookupMode == 0
      ;2. r12 needs to be 0 to preserve the stack (if we arrived at CallLoadLibraryDirect from CallLoadLibrary then r12
      ; will be initialised with the number of bytes reserved on the stack

      ;annoyingly r10 gets destroyed by LoadLibrary
      ;we need to hash calculated in CallLoadLibrary to survive this
      ;so after LoadLibrary is called we can pass the hash and ImageBaseAddress to ParseDllHeader
      sub rsp, 20h ;reserve shadow space for four registers (20h = 08h * 04h)
      call rax ;LoadLibraryA
      add rsp, 20h ;restore stack
      add rsp, r12 ;remove lpLibFileName / dll name from stack

      ;we have now called LoadLibrary so we need to do the following:
      ;after calling LoadLibrary for when qwLookupMode == 0
      ;if the function is also forwarded then it will jump to CallLoadLibrary
      ;if the function is not forwarded then it will be called
      ;
      ;when qwLookupMode == 1
      ;if we are calling LoadLibrary the function is forwarded
      ;
      ;this means that if RIP is here the function was forwarded or we are calling directly.
      ;if we are calling directly we need to make r12 == 0 when its LOOKUP_BY_NAME
      mov r11, rax ;rax is the return value of LoadLibrary -> ImageBaseAddress
      mov r10, r13 ;LookupLoadLibrary exchanges r10 and r13, put the original qwHash back into r10
      mov r12, r13 ;make qwLookupMode != 0 so that we dont attempt to LoadLibrary again
      xor r13, r13 ;clear qwLoadLibraryFlag so we dont attempt to call LoadLibrary again
      jmp _ParseDllHeader

      _Failed:
      xor r10, r10
      pop rbp ;pop registers used by function
      pop r15
      pop r14

      pop r9 ;pop parameters to looked up function
      pop r8
      pop rdx
      pop rcx
      ret
      funcCallFunctionByHash endp


      Hash Function



      utilHash proc
      ;rcx - null terminated function name
      push rcx
      push rdx

      mov rax, 5381d
      hl:
      mov rdx, rax
      shl rax, 5
      add rax, rdx
      xor al, [rcx]
      inc rcx
      ;check for null termination
      cmp byte ptr[rcx], 00h
      jne short hl

      pop rdx
      pop rcx
      ret

      utilHash endp


      I am interested in increasing performance, reducing memory usage, lowering the number of registers used (r14 and r15 are prime examples).



      Any improvements I can make to my coding style or other suggestions would be appreciated.



      Thanks










      share|improve this question













      I have been working on a function called "funcCallFunctionByHash", which accepts a 64bit DJB2 hash of the function name and either the base address or name of the DLL that exports it. A third parameter is used to define which mode the function operates in. The function preserves the four registers used in Micorosoft's x64 calling convention (rcx, rdx, r8 and r9) by pushing them to stack at the start of the function and popping them before calling the desired function.



      xor r12, r12
      lea r11, lpLibFileName
      mov r10, 00BF82C4B790C612CEh
      mov rcx, 10h
      call funcCallFunctionByHash
      lpLibFileName db "kernel32.dll", 0


      I use the above code to call the function. In this case I am passing the hash for ExitProcess in r10(I will add my hashing code at the bottom of this post). I am also passing lpLibFileName using r11 and clearing r12 (qwLookupMode) so the function operates in LOOKUP_BY_NAME mode. rcx is initialised to 16 as this will be the exit code of the process if it works.



      I am using ExitProcess because it is forwarded to "NTDLL.RtlExitUserProcess. It allows me to test all parts of the function.



      funcCallFunctionByHash proc

      ;save function parameters
      push rcx
      push rdx
      push r8
      push r9

      push r14
      push r15
      push rbp

      ;not in use yet
      cmp r12, LOOKUP_BY_NAME
      cmove rdi, r11
      je _LookupLoadLibrary

      _ParseDllHeader:
      mov eax, dword ptr [r11+3ch] ;IMAGE_DOS_HEADER->e_lfanew
      lea rax, qword ptr [r11+rax+88h];ImageBaseAddress + e_lfanew + 88h
      ;88h = IMAGE_NT_HEADERS64 (size is 18h)
      ;IMAGE_OPTIONAL_HEADER (size is 70h - including IMAGE_DATA_DIRECTORY[16])
      mov r14d, dword ptr[rax+04h] ;IMAGE_DATA_DIRECTORY[0]->Size
      mov eax, dword ptr[rax] ;IMAGE_DATA_DIRECTORY[0]->VirtualAddress
      mov r15d, eax ;save VirtualAddress for forwarded function testing
      add rax, r11 ;ImageBaseAddress + IMAGE_DATA_DIRECTORY[0]->VirtualAddress

      mov ecx, dword ptr[rax+18h] ;IMAGE_EXPORT_DIRECTORY->NumberOfNames
      mov r8d, dword ptr[rax+20h] ;IMAGE_EXPORT_DIRECTORY->AddressOfNames
      add r8, r11 ;ImageBaseAddress + AddressOfNames

      _ForEachName:

      dec ecx
      jz _Failed ;could not find function in exports
      ;AddressOfNames is a RVA to the list of exported names
      ;it points to an array of NumberOfNames 32-bit values
      ;each being a RVA to the exported symbol name.
      lea r9, [r8+04*rcx] ;AddressOfNames[NumberOfNames*04h]
      mov r9d, dword ptr[r9] ;follow pointer to get second RVA
      add r9, r11 ;ImageBaseAddress + AddressOfNames (second RVA)
      ;r9 now point to function name

      mov rdx, 5381d ;could use edx here if it is shorter
      _HashLoop:
      mov rbx, rdx
      shl rdx, 5
      add rdx, rbx
      xor dl, byte ptr[r9] ;xor with each character
      inc r9
      cmp byte ptr[r9], 00h ;check for null termination
      jne _HashLoop
      cmp rdx, r10
      jne _ForEachName

      xor rbx, rbx
      mov r8d, dword ptr[rax+24h] ;IMAGE_EXPORT_DIRECTORY->AddressOfNameOrdinals
      add r8, r11 ;ImageBaseAddress + AddressOfNameOrdinals
      mov bx, word ptr[r8+rcx*02h] ;AddressOfNameOrdinals[AddressOfNamesIndex[rcx]*02]

      mov r8d, dword ptr[rax+1ch] ;IMAGE_EXPORT_DIRECTORY->AddressOfFunctions
      add r8, r11 ;ImageBaseAddress + AddressOfFunctions
      mov eax, dword ptr[r8+rbx*04h] ;AddressOfFunctions[FunctionOrdinal*04h]

      ;check if forwarded
      ;(r8d > IMAGE_DATA_DIRECTORY[0]->VirtualAddress AND r8d < VirtualAddress + Size)
      cmp rax, r15
      jb _NotForwarded
      add r14, r15
      cmp rax, r14
      jb _Forwarded

      _NotForwarded:
      add rax, r11 ;ImageBaseAddress + AddressOfFunctions[FunctionOrdinal*04h]
      cmp r12, LOOKUP_BY_NAME ;check for LookUpByNameFlag == LOOKUP_BY_NAME (0)
      cmove rcx, rdi ;conditional move dll name into lpLibFileName for LoadLibrary
      je _CallLoadLibraryDirect
      cmp r13, 00h ;check for the LoadLibraryFlag, is "test r13, r13" better?
      jne _CallLoadLibrary
      pop rbp ;pop registers used by function
      pop r15
      pop r14

      pop r9 ;pop parameters to looked up function
      pop r8
      pop rdx
      pop rcx
      ;may need to "sub rsp, 20h" to make shadow space for function
      call rax
      ret

      _Forwarded:

      ;rsi is only set here which means that after returning from ParseDllHeader to CallLoadLibrary
      ;rax = LoadLibrary
      ;rsi = forwarded function name
      add rax, r11 ;ImageBaseAddress + AddressOfFunctions[FunctionOrdinal*04h]
      mov rsi, rax ;rsi is used ScanFor2E
      _LookupLoadLibrary:
      ;TODO: check if requested dll is already present in _PED_LDR_DATA (need to write a UNICODE_STRING hasher)
      mov r9, qword ptr gs:[60h] ;PEB
      mov r9, qword ptr[r9+18h] ;PEB->Ldr (_PEB_LDR_DATA)
      lea r9, qword ptr[r9+10h] ;_PEB_LDR_DATA->InLoadOrderModuleList (LDR_DATA_TABLE_ENTRY)
      mov r9, qword ptr[r9] ;_LIST_ENTRY->Flink (_LIST_ENTRY)
      mov r9, qword ptr[r9] ;_LIST_ENTRY->Flink (_LIST_ENTRY)
      mov r9, qword ptr[r9] ;_LIST_ENTRY->Flink (_LIST_ENTRY)

      ;save the base address (kernel32)
      mov r11, [r9+30h]

      ;qwLoadLibraryHash
      mov r13, 00b9a3b50901ed9addh;set qwHashComparison
      xchg r10, r13 ;this is required because in the case of qwLookupMode == 0 ie LOOKUP_BY_NAME
      ;this saves the requested qwHash to r13 for use later
      jmp _ParseDllHeader

      _CallLoadLibrary:

      ;if CallLoadLibrary is jumped to it means that a forwarded function was requested and the code has ALREADY
      ; 1. looked up the address of LoadLibrary - which is now in rax
      ; 2. initialised rsi to point to the forwarder string
      xor rcx, rcx
      _ScanFor2E:
      cmp byte ptr[rsi+rcx], 2Eh
      lea rcx, qword ptr[rcx+01h] ;lea does not effect flags, putting the increment here means
      jne _ScanFor2E ;[rsi+rcx] points to "2Eh"

      mov r12, rcx
      add r12, 0Ch ;0Ch = 04h ("dll") + 07h (rounding up to multiple of 08h)
      and r12, 0fffffff8h ;see StackOverflow answer

      sub rsp, r12 ;reserve stack space for dll name
      mov rdi, rsp

      ;can i optimise this to avoid having to push and pop rax?
      ; 1. move it to a different register before this point?
      ; 2. implement my own string instructions
      push rax
      _CopyForwarderString:
      lodsb
      stosb
      loop _CopyForwarderString
      pop rax

      ;write "dll"
      mov dword ptr[rdi], 006c6c64h

      ;hash from rsi to /0
      mov rdx, 5381d ;could use edx here if it is shorter
      _HashLoop2:
      mov rbx, rdx
      shl rdx, 5
      add rdx, rbx
      xor dl, byte ptr[rsi] ;xor with each character
      inc rsi
      cmp byte ptr[rsi], 00h ;check for null termination
      jne _HashLoop2
      mov r13, rdx

      ;current situation:
      ;r10 is the hash of the forwarded function
      ;r11 is kernel32.dll ImageBaseAddress
      ;r12 is the size of stack space reserved by dll name

      mov rcx, rsp ;lpLibFileName
      _CallLoadLibraryDirect:
      ;if CallLoadLibraryDirect is jumped to the following conditions must be true:
      ; 1. rcx points to either:
      ; a. parsed forwarder string
      ; b. passed lpLibFileName when qwLookupMode == 0
      ;2. r12 needs to be 0 to preserve the stack (if we arrived at CallLoadLibraryDirect from CallLoadLibrary then r12
      ; will be initialised with the number of bytes reserved on the stack

      ;annoyingly r10 gets destroyed by LoadLibrary
      ;we need to hash calculated in CallLoadLibrary to survive this
      ;so after LoadLibrary is called we can pass the hash and ImageBaseAddress to ParseDllHeader
      sub rsp, 20h ;reserve shadow space for four registers (20h = 08h * 04h)
      call rax ;LoadLibraryA
      add rsp, 20h ;restore stack
      add rsp, r12 ;remove lpLibFileName / dll name from stack

      ;we have now called LoadLibrary so we need to do the following:
      ;after calling LoadLibrary for when qwLookupMode == 0
      ;if the function is also forwarded then it will jump to CallLoadLibrary
      ;if the function is not forwarded then it will be called
      ;
      ;when qwLookupMode == 1
      ;if we are calling LoadLibrary the function is forwarded
      ;
      ;this means that if RIP is here the function was forwarded or we are calling directly.
      ;if we are calling directly we need to make r12 == 0 when its LOOKUP_BY_NAME
      mov r11, rax ;rax is the return value of LoadLibrary -> ImageBaseAddress
      mov r10, r13 ;LookupLoadLibrary exchanges r10 and r13, put the original qwHash back into r10
      mov r12, r13 ;make qwLookupMode != 0 so that we dont attempt to LoadLibrary again
      xor r13, r13 ;clear qwLoadLibraryFlag so we dont attempt to call LoadLibrary again
      jmp _ParseDllHeader

      _Failed:
      xor r10, r10
      pop rbp ;pop registers used by function
      pop r15
      pop r14

      pop r9 ;pop parameters to looked up function
      pop r8
      pop rdx
      pop rcx
      ret
      funcCallFunctionByHash endp


      Hash Function



      utilHash proc
      ;rcx - null terminated function name
      push rcx
      push rdx

      mov rax, 5381d
      hl:
      mov rdx, rax
      shl rax, 5
      add rax, rdx
      xor al, [rcx]
      inc rcx
      ;check for null termination
      cmp byte ptr[rcx], 00h
      jne short hl

      pop rdx
      pop rcx
      ret

      utilHash endp


      I am interested in increasing performance, reducing memory usage, lowering the number of registers used (r14 and r15 are prime examples).



      Any improvements I can make to my coding style or other suggestions would be appreciated.



      Thanks







      assembly x86






      share|improve this question













      share|improve this question











      share|improve this question




      share|improve this question










      asked Nov 19 at 2:54









      Will

      1215




      1215



























          active

          oldest

          votes











          Your Answer





          StackExchange.ifUsing("editor", function () {
          return StackExchange.using("mathjaxEditing", function () {
          StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix) {
          StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
          });
          });
          }, "mathjax-editing");

          StackExchange.ifUsing("editor", function () {
          StackExchange.using("externalEditor", function () {
          StackExchange.using("snippets", function () {
          StackExchange.snippets.init();
          });
          });
          }, "code-snippets");

          StackExchange.ready(function() {
          var channelOptions = {
          tags: "".split(" "),
          id: "196"
          };
          initTagRenderer("".split(" "), "".split(" "), channelOptions);

          StackExchange.using("externalEditor", function() {
          // Have to fire editor after snippets, if snippets enabled
          if (StackExchange.settings.snippets.snippetsEnabled) {
          StackExchange.using("snippets", function() {
          createEditor();
          });
          }
          else {
          createEditor();
          }
          });

          function createEditor() {
          StackExchange.prepareEditor({
          heartbeatType: 'answer',
          convertImagesToLinks: false,
          noModals: true,
          showLowRepImageUploadWarning: true,
          reputationToPostImages: null,
          bindNavPrevention: true,
          postfix: "",
          imageUploader: {
          brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
          contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
          allowUrls: true
          },
          onDemand: true,
          discardSelector: ".discard-answer"
          ,immediatelyShowMarkdownHelp:true
          });


          }
          });














           

          draft saved


          draft discarded


















          StackExchange.ready(
          function () {
          StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f207950%2fgetprocaddress-by-hash-implementation-masm-64bit%23new-answer', 'question_page');
          }
          );

          Post as a guest















          Required, but never shown






























          active

          oldest

          votes













          active

          oldest

          votes









          active

          oldest

          votes






          active

          oldest

          votes
















           

          draft saved


          draft discarded



















































           


          draft saved


          draft discarded














          StackExchange.ready(
          function () {
          StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f207950%2fgetprocaddress-by-hash-implementation-masm-64bit%23new-answer', 'question_page');
          }
          );

          Post as a guest















          Required, but never shown





















































          Required, but never shown














          Required, but never shown












          Required, but never shown







          Required, but never shown

































          Required, but never shown














          Required, but never shown












          Required, but never shown







          Required, but never shown







          Popular posts from this blog

          Сан-Квентин

          8-я гвардейская общевойсковая армия

          Алькесар