Why does this simple assembly work in AT&T syntax but not Intel syntax?
What's wrong with this code (works on x86_64 Linux)?
.intel_syntax
.text
.globl _start
_start:
mov rax, 1
mov rdi, 1
mov rsi, msg
mov rdx, 14
syscall
mov rax, 60
mov rdi, 0
syscall
.data
msg:
.ascii "Hello, world!\n"
When I ran it:
$ clang -o hello_intel hello_intel.s -nostdlib && ./hello_intel
No withdrawal. Let it be:
$ strace ./hello_intel
execve("./hello_intel", ["./hello_intel"], [/* 96 vars */]) = 0
write(1, 0x77202c6f6c6c6548, 14) = -1 EFAULT (Bad address)
exit(0) = ?
+++ exited with 0 +++
This is dereferencing msg
instead of using its location. Why?
If I use AT & T syntax instead ...
.text
.globl _start
_start:
mov $1, %rax
mov $1, %rdi
mov $msg, %rsi
mov $14, %rdx
syscall
mov $60, %rax
mov $0, %rdi
syscall
.data
msg:
.ascii "Hello, world!\n"
... it works great:
$ clang -o hello_att hello_att.s -nostdlib && ./hello_att Hello, world!
What's the difference between the two?
Here's a worker:
$ objdump -d hello_att -s -M intel
hello_att: file format elf64-x86-64
Contents of section .text:
4000e8 48c7c001 00000048 c7c70100 000048c7 H......H......H.
4000f8 c6160160 0048c7c2 0e000000 0f0548c7 ...`.H........H.
400108 c03c0000 0048c7c7 00000000 0f05 .<...H........
Contents of section .data:
600116 48656c6c 6f2c2077 6f726c64 210a Hello, world!.
Disassembly of section .text:
00000000004000e8 <_start>:
4000e8: 48 c7 c0 01 00 00 00 mov rax,0x1
4000ef: 48 c7 c7 01 00 00 00 mov rdi,0x1
4000f6: 48 c7 c6 16 01 60 00 mov rsi,0x600116
4000fd: 48 c7 c2 0e 00 00 00 mov rdx,0xe
400104: 0f 05 syscall
400106: 48 c7 c0 3c 00 00 00 mov rax,0x3c
40010d: 48 c7 c7 00 00 00 00 mov rdi,0x0
400114: 0f 05 syscall
And here's the broken one:
$ objdump -d hello_intel -s -M intel
hello_intel: file format elf64-x86-64
Contents of section .text:
4000e8 48c7c001 00000048 c7c70100 0000488b H......H......H.
4000f8 34251701 600048c7 c20e0000 000f0548 4%..`.H........H
400108 c7c03c00 000048c7 c7000000 000f05 ..<...H........
Contents of section .data:
600117 48656c6c 6f2c2077 6f726c64 210a Hello, world!.
Disassembly of section .text:
00000000004000e8 <_start>:
4000e8: 48 c7 c0 01 00 00 00 mov rax,0x1
4000ef: 48 c7 c7 01 00 00 00 mov rdi,0x1
4000f6: 48 8b 34 25 17 01 60 mov rsi,QWORD PTR ds:0x600117
4000fd: 00
4000fe: 48 c7 c2 0e 00 00 00 mov rdx,0xe
400105: 0f 05 syscall
400107: 48 c7 c0 3c 00 00 00 mov rax,0x3c
40010e: 48 c7 c7 00 00 00 00 mov rdi,0x0
400115: 0f 05 syscall
So the important difference here is 0x600116
vs QWORD PTR ds:0x600117
which really looks like the difference between a pointer and a pointer dereference.
So how do you not play out a pointer in Intel syntax?
source to share
Here's the code that works in GCC:
.intel_syntax noprefix
.text
.globl _start
_start:
mov rax, 1
mov rdi, 1
mov rsi, offset msg
mov rdx, 14
syscall
mov rax, 60
mov rdi, 0
syscall
.data
msg:
.ascii "Hello, world!\n"
You need to add tags noprefix
and offset
. Unfortunately this one doesn't work with clang :
hello_intel.s:8:24: error: unknown token in expression
mov rsi, offset msg
^
However, you can work around the problem by using lea
instead mov
:
lea rsi, msg
This works in both clang and gcc.
source to share