PROWAREtech
x86-64 Assembly: Tutorial - A Quick Guide to the Changes in 64-bit Assembly - Page 3
Copy Memory Using 64-bit Registers, Copy Memory Using 128-bit Registers.
Copy Memory Using 64-bit Registers
This x64 assembly code will copy memory from one place to another using 8-byte registers then falls back to using 1-byte registers for the remainder. For performance reasons, no division operations are performed. Notice that the stack is never touched here.
_TEXT SEGMENT
memcopyx64 PROC
mov rax, rcx ; move the dest pointer in rcx to rax
mov rcx, r8 ; move length to rcx
shr rcx, 3 ; divide by 8 - holds quotient now
shl rcx, 3 ; divide by 8 - holds quotient now
sub r8, rcx ; find the remainder and store in r8
shr rcx, 3 ; divide by 8 - holds quotient now
cmp rcx, 0 ; make sure there is at least 8 bytes to copy
je compare_remainder
copy8bytes:
mov r9, QWORD PTR [rdx] ; copy 8 bytes from src to r9
mov QWORD PTR [rax], r9 ; copy 8 bytes from r9 to dest
add rax, 8
add rdx, 8
loopnz copy8bytes ; loop while rcx > 0 - this will automatically decrement rcx
compare_remainder:
cmp r8, 0 ; if there is no remainder then finished
je exit
mov rcx, r8 ; move the remainder to rcx
copybytes:
mov r9b, BYTE PTR [rdx] ; copy 1 byte from src to r9b
mov BYTE PTR [rax], r9b ; copy 1 byte from r9b to dest
inc rax
inc rdx
loopnz copybytes ; loop while rcx > 0 - this will automatically decrement rcx
exit:
ret
memcopyx64 ENDP
_TEXT ENDS
END
The driver source code:
extern "C" char * memcopyx64(char *dest, const char *src, unsigned long long length);
int main()
{
char dest[256];
const char* src = "abcdefghijklmnopqrstuvwxyz!ABCDEFGHIJKLMNOPQRSTUVWXYZ!";
unsigned int len = 0;
while (src[len])
len++;
len++; // add the sentinel to the length
char* end = dest;
for (int i = 0; i < 3; i++)
end = memcopyx64(end, src, len) - 1; // subtract the sentinel
return 0;
}
Copy Memory Using 128-bit Registers
This x64 assembly code will copy memory from one place to another using 16-byte registers then falls back to using 2-byte registers for the remainder with the exception of the last odd byte. For performance reasons, no division operations are performed just as above, and the stack is never touched here. This code is a port of the x86 version.
_TEXT SEGMENT
memcopy128 PROC
mov rax, rcx ; move the dest pointer in rcx to rax
mov r10, r8 ; move length to r10
shr r10, 4 ; divide by 16 - holds quotient now
shl r10, 4 ; multiply by 16
sub r8, r10 ; find the remainder and store in r8
shr r10, 4 ; divide by 16 - holds quotient now
cmp r10, 0 ; make sure there are at least 16 bytes to copy
je compare_remainder
mov rcx, r10 ; move the quotient to rcx
copy16bytes:
movdqu xmm0, XMMWORD PTR [rdx] ; copy 16 bytes from src to xmm0
movdqu XMMWORD PTR [rax], xmm0 ; copy 16 bytes from xmm0 to dest
add rax, 16
add rdx, 16
loopnz copy16bytes ; loop while rcx > 0 - this will automatically decrement rcx
compare_remainder:
cmp r8, 0 ; if there is no remainder then finished
je exit
mov rcx, r8 ; move the remainder to rcx
shr rcx, 1 ; divide by 2
cmp rcx, 0 ; make sure there are at least 2 bytes to copy
je check_odd_byte
copy2bytes:
mov r9w, WORD PTR [rdx] ; copy 2 bytes from src to r9w
mov WORD PTR [rax], r9w ; copy 2 byte from r9w to dest
add rax, 2
add rdx, 2
loopnz copy2bytes ; loop while rcx > 0 - this will automatically decrement rcx
check_odd_byte:
test r8, 1
jz exit
mov r9b, BYTE PTR [rdx] ; copy 1 byte from src to r9b
mov BYTE PTR [rax], r9b ; copy 1 byte from r9b to dest
inc rax
exit:
ret
memcopy128 ENDP
_TEXT ENDS
END
The driver source code:
extern "C" char * memcopy128(char *dest, const char *src, unsigned long long length);
int main()
{
char dest[256];
const char* src = "abcdefghijklmnopqrstuvwxyz!ABCDEFGHIJKLMNOPQRSTUVWXYZ!";
unsigned int len = 0;
while (src[len])
len++;
len++; // add the sentinel to the length
char* end = dest;
for (int i = 0; i < 3; i++)
end = memcopy128(end, src, len) - 1; // subtract the sentinel
return 0;
}
Comment