exercise session 5 - systems group · sparse switch example – not practical to use jump table •...
TRANSCRIPT
Exercise Session 5 Computer Architecture and
Systems Programming
Herbstsemester 2016
© Systems Group | Department of Computer Science | ETH Zürich
Agenda
• Common mistakes in assignment 3
• Compiling C
• x86-64 Calling Conventions
• Alignment
• Outlook on assignment 5
Assignment 3 Mistakes
• Don’t free all allocated memory struct comlex_set *alloc(…) { first malloc succeded second malloc if (ptr==NULL) { return NULL; }; }
Assignment 3 Mistakes
• Don’t free all allocated memory struct comlex_set *alloc(…) { first malloc succeded second malloc if (ptr==NULL) { free(set); return NULL; }; }
Assignment 3 Mistakes
• Not consider all whitespace characters
int main() { … if (c == ‘ ‘) { … }; }
Conditional branch example #include <stdio.h> int putmax(int x, int y) { int result; if (x > y) { result = printf("%d\n", x); } else { result = printf("%d\n", y); } return result; }
putmax: subq $8, %rsp cmpl %esi, %edi jle .L2 movl %edi, %edx movl $.LC0, %esi movl $1, %edi movl $0, %eax call __printf_chk jmp .L3 .L2: movl %esi, %edx movl $.LC0, %esi movl $1, %edi movl $0, %eax call __printf_chk .L3: addq $8, %rsp ret
Setup
AS 2016 Compiling C Control Flow
Test
Body 1
Body 2
Finish
7
Conditional branch example #include <stdio.h> int putmax(int x, int y) { int result; if (x <= y) { goto Else; } result = printf("%d\n", x); goto Done; Else: result = printf("%d\n", y); Done: return result; }
putmax: subq $8, %rsp cmpl %esi, %edi jle .L2 movl %edi, %edx movl $.LC0, %esi movl $1, %edi movl $0, %eax call __printf_chk jmp .L3 .L2: movl %esi, %edx movl $.LC0, %esi movl $1, %edi movl $0, %eax call __printf_chk .L3: addq $8, %rsp ret
Setup
AS 2016 Compiling C Control Flow
Test
Body 1
Body 2
Finish
8
Loops
• Do-While loop
• While-Do loop
C Code do Body while (Test);
Goto version loop: Body if (Test) goto loop
While version while (Test) Body
Do-While version if (!Test) goto done; do Body while(Test); done:
Goto version if (!Test) goto done; loop: Body if (Test) goto loop; done:
goto middle; loop: Body middle: if (Test) goto loop;
or
AS 2016 Compiling C Control Flow
“For”→ “While”→ “Do-While”
for (Init; Test; Update )
Body
Init; while (Test ) { Body Update ; }
Goto version Init; if (!Test) goto done; loop: Body Update ; if (Test) goto loop; done:
While version
For version
Do-While version Init; if (!Test) goto done; do { Body Update ; } while (Test) done:
“For”→ “While” (jump-to-middle)
for (Init; Test; Update )
Body
Init; while (Test ) { Body Update ; }
Init; goto middle; loop: Body Update ; middle: if (Test) goto loop; done:
While version
For version
Goto version
AS 2016 Compiling C Control Flow 11
Switch statement
Setup:
switch_eg: movq %rdx, %rcx cmpq $6, %rdi # x : 6? ja .L8 # if > goto default jmp *.L4(,%rdi,8) # goto Jtab[x]
long switch_eg(long x, long y, long z) { long w = 1; switch(x) { . . . } return w; }
Indirect jump
Jump table:
.section .rodata .align 8 .align 4 .L4: .quad .L8 # x=0 .quad .L3 # x=1 .quad .L5 # x=2 .quad .L9 # x=3 .quad .L8 # x=4 .quad .L7 # x=5 .quad .L7 # x=6
AS 2016 Compiling C Control Flow 12
Assembly setup explanation
• Table Structure – Each target requires 8 bytes – Base address at .L4
• Jumping
Direct: jmp .L8 – Jump target is denoted by label .L8 Indirect: jmp *.L4(,%rdi,8) – Must scale by factor of 8 (labels are 64-bit = 8 Bytes on x86_64) – Fetch target from effective Address .L61 + rdi*8
• Only for 0 ≤ x ≤ 6
Jump table
AS 2016 Compiling C Control Flow
.section .rodata .align 8 .align 4 .L4: .quad .L8 # x=0 .quad .L3 # x=1 .quad .L5 # x=2 .quad .L9 # x=3 .quad .L8 # x=4 .quad .L7 # x=5 .quad .L7 # x=6
13
switch(x) { case 1: // .L56 w = y*z; break; case 2: // .L57 w = y/z; /* Fall Through */ case 3: // .L58 w += z; break; case 5: case 6: // .L60 w -= z; break; default: // .L61 w = 2; }
.section .rodata .align 8 .align 4 .L4: .quad .L8 # x=0 .quad .L3 # x=1 .quad .L5 # x=2 .quad .L9 # x=3 .quad .L8 # x=4 .quad .L7 # x=5 .quad .L7 # x=6
Jump table Jump table
AS 2016 14 Compiling C Control Flow
Sparse switch example
– Not practical to use jump table
• Would require 1000 entries
– Obvious translation into if-then-else would have max. of 9 tests
/* Return x/111 if x is multiple && <= 999. -1 otherwise */ int div111(int x) { switch(x) { case 0: return 0; case 111: return 1; case 222: return 2; case 333: return 3; case 444: return 4; case 555: return 5; case 666: return 6; case 777: return 7; case 888: return 8; case 999: return 9; default: return -1; } } 15
Sparse switch code – Compares x to possible
case values – Jumps different places
depending on outcomes
div111: cmpl $444, %edi je .L3 jle .L28 cmpl $777, %edi je .L10 jg .L11 cmpl $555, %edi movl $5, %eax je .L5 cmpl $666, %edi movb $6, %al jne .L2 .L5: rep ret
AS 2016
.L28: cmpl $111, %edi movl $1, %eax je .L5 jle .L29 cmpl $222, %edi movl $2, %eax je .L5 cmpl $333, %edi movb $3, %al je .L5 .L2: movl $-1, %eax ret
%rax
%rbx
%rcx
%rdx
%rsi
%rdi
%rsp
%rbp
x86-64 integer registers %r8
%r9
%r10
%r11
%r12
%r13
%r14
%r15 Callee saved; frame ptr Callee saved
Callee saved
Callee saved
Callee saved
Static chain ptr
Callee saved; base ptr
Stack pointer
Used for linking
Return value, # varargs
Argument #4
Argument #1
Argument #3 (& 2nd return)
Argument #2
Argument #6
Argument #5
AS 2016 Compiling C Control Flow 18
x86-64 integer registers
• Caller-saved registers (rax, rcx, rdx, rsi, rdi, r8-r11) – Caller stores registers he needs on his stack – Used to hold temporary quantities that need not be preserved across
calls • Callee-saved (rbx, rbp, r12-r15)
– Callee stores registers on his stack before using them – Restores them before returning – Used to hold long-lived values that should be preserved across calls
• X86-64: May pass up to 6 arguments through registers
– Depends on the type of the argument – Uses caller saved registers: rdi, rsi, rdx, rcx, r8, r9 – More arguments passed on the stack
Alignment
• Memory accessed by (aligned) chunks of 4 or 8 bytes – System dependent
– Inefficient to load or store datum that spans quad word boundaries
– Virtual memory very tricky when datum spans 2 pages
• Compiler inserts gaps in structure to ensure correct alignment of fields
AS 2016 Compiling C data structures 21
Specific cases of alignment (x86-64)
• 1 byte: char, … – no restrictions on address
• 2 bytes: short, … – lowest 1 bit of address must be 02
• 4 bytes: int, float, … – lowest 2 bits of address must be 002
• 8 bytes: double, char *, … – Windows & Linux:
• lowest 3 bits of address must be 0002
• 16 bytes: long double – Linux:
• lowest 3 bits of address must be 0002 • i.e., treated the same as a 8-byte primitive data type
AS 2016 Compiling C data structures 22
Specific cases of alignment (ia32)
• 1 byte: char, … – no restrictions on address
• 2 bytes: short, … – lowest 1 bit of address must be 02
• 4 bytes: int, float, char *, … – lowest 2 bits of address must be 002
• 8 bytes: double, … – Windows (and most other OS’s & instruction sets):
• lowest 3 bits of address must be 0002
– Linux: • lowest 2 bits of address must be 002 • i.e., treated the same as a 4-byte primitive data type
• 12 bytes: long double – Windows, Linux:
• lowest 2 bits of address must be 002 • i.e., treated the same as a 4-byte primitive data type
AS 2016 Compiling C data structures 23
struct S1 { char c; int i[2]; double v; } *p;
Satisfying alignment with structures
• Within structure: – Must satisfy element’s alignment requirement
• Overall structure placement – Each structure has alignment requirement K
• K = Largest alignment of any element – Initial address & structure length must be multiples of K
• Example (under Windows or x86-64): – K = 8, due to double element
c i[0] i[1] v 3 bytes 4 bytes
p+0 p+4 p+8 p+16 p+24
Multiple of 4 Multiple of 8
Multiple of 8 Multiple of 8 AS 2016 Compiling C data structures 24
Assignment 5
• Pen & Paper
• Translating C to Assembly and vice versa
• Alignment
• Passing Parameters to functions in Assembly
Quiz: Alignment
struct S1 { char c; double d[2]; int v; } *p;
struct S2 { double d[2]; int v; char c; } *q;
struct S3 { int id; struct S1 st; char c; } *r;
struct S4 { int i[2]; struct S3 st; char name[5]; } *s;
What is the size and the alignment of these structs?
Quiz: Alignment
struct S1 { char c; double d[2]; int v; } *p;
struct S2 { double d[2]; int v; char c; } *q;
Size 32 bytes Alignment 8 bytes
Size 24 bytes Alignment 8 bytes
Quiz: Alignment
struct S3 { int id; struct S1 st; char c; } *r;
struct S4 { int i[2]; struct S3 st; char name[5]; } *s;
Size 48 bytes Alignment 8 bytes
Size 64 bytes Alignment 8 bytes
Quiz: Parameter passing
uint64_t subtract( uint64_t a, uint64_t b) { return a - b; }
int main() { return subtract(3,2); }
Quiz: Parameter passing
uint64_t subtract( uint64_t a, uint64_t b) { return a - b; }
int main() { return subtract(3,2); }
subtract: subq %rsi, %rdi movq %rdi, %rax ret main: movl $3, %edi movl $2, %esi jmp subtract
Quiz: Parameter passing
uint64_t subtract( uint64_t* a, uint64_t* b) { return *a - *b; } int main() { uint64_t a = 3; uint64_t b = 2; return subtract(&a,&b); }
Quiz: Parameter passing
uint64_t subtract( uint64_t* a, uint64_t* b) { return *a - *b; } int main() { uint64_t a = 3; uint64_t b = 2; return subtract(&a,&b); }
subtract: movq (%rdi), %rax subq (%rsi), %rax ret main: pushq %rbp movq %rsp, %rbp subq $16, %rsp movq $3, -8(%rbp) movq $2, -16(%rbp) leaq -8(%rbp), %rdi leaq -16(%rbp), %rsi callq subtract addq $16, %rsp popq %rbp ret
Quiz: function call
foo: pushq %rbp movq %rsp, %rbp movl %edi, -4(%rbp) movl -4(%rbp), %eax imull -4(%rbp), %eax popq %rbp ret main: pushq %rbp movq %rsp, %rbp subq $32, %rsp movl %edi, -20(%rbp) movq %rsi, -32(%rbp) movl $4, -8(%rbp) movl -8(%rbp), %eax movl %eax, %edi call foo movl %eax, -4(%rbp) leave ret
What is happening here?
Quiz: function call
foo: pushq %rbp movq %rsp, %rbp movl %edi, -4(%rbp) movl -4(%rbp), %eax imull -4(%rbp), %eax popq %rbp ret main: pushq %rbp movq %rsp, %rbp subq $32, %rsp movl %edi, -20(%rbp) movq %rsi, -32(%rbp) movl $4, -8(%rbp) movl -8(%rbp), %eax movl %eax, %edi call foo movl %eax, -4(%rbp) leave ret
int foo(int x) { return( x * x); } int main(int argc, char *argv[]) { int a = 4, b; b = foo(a); }
$ cc -S –O0 hello.c