PROWAREtech
x86 Assembly: wcstok Procedure
Find tokens in wide-char string.
Use MASM for Visual C++ Express Edition 2005 to compile this procedure.
The procedure wcstok_asm
considers string to consist of a sequence of text tokens
separated by one or more delimiting characters. Subsequent calls will work through the input string until no
tokens remain. The input string will be modified. When no tokens remain, a NULL pointer is returned. This
procedure is safe in multithreaded applications so long as string is not global or static.
Parameters:
wchar_t **string
wchar_t
string to tokenize, this is a pointer to a pointerwchar_t *delimiters
- characters to use as delimiters
Returns a pointer to the next token in string. Returns NULL when no more tokens remain. string is modified.
TITLE 'extern "C" wchar_t *wcstok_asm(wchar_t **string, const wchar_t *delimiters);'
.386P
.model FLAT
PUBLIC _wcstok_asm
_TEXT SEGMENT
_wcstok_asm PROC NEAR
mov ecx, DWORD PTR [esp+4] ; string
mov eax, DWORD PTR [ecx]
cmp WORD PTR [eax], 0
push ebx
push esi
push edi
mov edi, DWORD PTR [esp+20] ; delimiters
je SHORT label4
; skip over leading delimiters
label1:
xor eax, eax
mov ax, WORD PTR [edi]
test ax, ax
mov edx, edi
je SHORT label3
mov esi, DWORD PTR [ecx]
mov si, WORD PTR [esi]
label2:
cmp ax, si
je SHORT label3
mov ax, WORD PTR [edx+2]
add edx, 2
test ax, ax
jne SHORT label2
label3:
cmp WORD PTR [edx], 0
je SHORT label4
mov esi, DWORD PTR [ecx]
add esi, 2
mov eax, esi
mov DWORD PTR [ecx], esi
cmp WORD PTR [eax], 0
jne SHORT label1
label4:
mov ebx, DWORD PTR [ecx]
; find the end of the token and if it is not the end of the string then terminate with NULL character
cmp WORD PTR [ebx], 0
je SHORT label9
label5:
xor eax, eax
mov ax, WORD PTR [edi]
test ax, ax
mov edx, edi
je SHORT label7
mov esi, DWORD PTR [ecx]
mov si, WORD PTR [esi]
label6:
cmp ax, si
je SHORT label7
mov ax, WORD PTR [edx+2]
add edx, 2
test ax, ax
jne SHORT label6
label7:
cmp WORD PTR [edx], 0
jne SHORT label8
mov esi, DWORD PTR [ecx]
add esi, 2
mov eax, esi
mov DWORD PTR [ecx], esi
cmp WORD PTR [eax], 0
jne SHORT label5
; if a token has been found
xor eax, eax
cmp ebx, esi
sete al ; SET if Equal
pop edi
pop esi
dec eax
and eax, ebx
pop ebx
ret 0
label8:
mov edx, DWORD PTR [ecx]
mov WORD PTR [edx], 0 ; terminate string with NULL character
add DWORD PTR [ecx], 2 ; increment pointer past the NULL character
label9:
; determine if token has been found
mov esi, DWORD PTR [ecx]
xor eax, eax
cmp ebx, esi
sete al ; SET if Equal
pop edi
pop esi
dec eax
and eax, ebx
pop ebx
ret 0
_wcstok_asm ENDP
_TEXT ENDS
END
Example usage: parse tab delimited data
#include <stdio.h>
extern "C" wchar_t *wcstok_asm(wchar_t **string, const wchar_t *delimiters);
int main()
{
wchar_t s[100] = L"812\t7022\t1477478\t9038\r\nabc\txyc\tlmn\tedf\r\n", *row, *sp;
wprintf(L"parsing:\r\n%s\r\n", s);
sp = s;
while(row = wcstok_asm(&sp, L"\r\n")) {
while(wchar_t *tok = wcstok_asm(&row, L"\t"))
wprintf(L"%s\r\n", tok);
wprintf(L"\r\n");
}
// s is modified after using wcstok_asm
return 0;
}
Another example would be to use this function in a programming language parser.
Comment