exercise session 5 - systems group · sparse switch example – not practical to use jump table •...

35
Exercise Session 5 Computer Architecture and Systems Programming Herbstsemester 2016 © Systems Group | Department of Computer Science | ETH Zürich

Upload: ngonga

Post on 04-Apr-2018

227 views

Category:

Documents


5 download

TRANSCRIPT

Exercise Session 5 Computer Architecture and

Systems Programming

Herbstsemester 2016

© Systems Group | Department of Computer Science | ETH Zürich

Agenda

• Common mistakes in assignment 3

• Compiling C

• x86-64 Calling Conventions

• Alignment

• Outlook on assignment 5

Assignment 3 Mistakes

• Don’t free all allocated memory struct comlex_set *alloc(…) { first malloc succeded second malloc if (ptr==NULL) { return NULL; }; }

Assignment 3 Mistakes

• Don’t free all allocated memory struct comlex_set *alloc(…) { first malloc succeded second malloc if (ptr==NULL) { free(set); return NULL; }; }

Assignment 3 Mistakes

• Not consider all whitespace characters

int main() { … if (c == ‘ ‘) { … }; }

Compiling C

AS 2016 Compiling C Control Flow 6

Conditional branch example #include <stdio.h> int putmax(int x, int y) { int result; if (x > y) { result = printf("%d\n", x); } else { result = printf("%d\n", y); } return result; }

putmax: subq $8, %rsp cmpl %esi, %edi jle .L2 movl %edi, %edx movl $.LC0, %esi movl $1, %edi movl $0, %eax call __printf_chk jmp .L3 .L2: movl %esi, %edx movl $.LC0, %esi movl $1, %edi movl $0, %eax call __printf_chk .L3: addq $8, %rsp ret

Setup

AS 2016 Compiling C Control Flow

Test

Body 1

Body 2

Finish

7

Conditional branch example #include <stdio.h> int putmax(int x, int y) { int result; if (x <= y) { goto Else; } result = printf("%d\n", x); goto Done; Else: result = printf("%d\n", y); Done: return result; }

putmax: subq $8, %rsp cmpl %esi, %edi jle .L2 movl %edi, %edx movl $.LC0, %esi movl $1, %edi movl $0, %eax call __printf_chk jmp .L3 .L2: movl %esi, %edx movl $.LC0, %esi movl $1, %edi movl $0, %eax call __printf_chk .L3: addq $8, %rsp ret

Setup

AS 2016 Compiling C Control Flow

Test

Body 1

Body 2

Finish

8

Loops

• Do-While loop

• While-Do loop

C Code do Body while (Test);

Goto version loop: Body if (Test) goto loop

While version while (Test) Body

Do-While version if (!Test) goto done; do Body while(Test); done:

Goto version if (!Test) goto done; loop: Body if (Test) goto loop; done:

goto middle; loop: Body middle: if (Test) goto loop;

or

AS 2016 Compiling C Control Flow

“For”→ “While”→ “Do-While”

for (Init; Test; Update )

Body

Init; while (Test ) { Body Update ; }

Goto version Init; if (!Test) goto done; loop: Body Update ; if (Test) goto loop; done:

While version

For version

Do-While version Init; if (!Test) goto done; do { Body Update ; } while (Test) done:

“For”→ “While” (jump-to-middle)

for (Init; Test; Update )

Body

Init; while (Test ) { Body Update ; }

Init; goto middle; loop: Body Update ; middle: if (Test) goto loop; done:

While version

For version

Goto version

AS 2016 Compiling C Control Flow 11

Switch statement

Setup:

switch_eg: movq %rdx, %rcx cmpq $6, %rdi # x : 6? ja .L8 # if > goto default jmp *.L4(,%rdi,8) # goto Jtab[x]

long switch_eg(long x, long y, long z) { long w = 1; switch(x) { . . . } return w; }

Indirect jump

Jump table:

.section .rodata .align 8 .align 4 .L4: .quad .L8 # x=0 .quad .L3 # x=1 .quad .L5 # x=2 .quad .L9 # x=3 .quad .L8 # x=4 .quad .L7 # x=5 .quad .L7 # x=6

AS 2016 Compiling C Control Flow 12

Assembly setup explanation

• Table Structure – Each target requires 8 bytes – Base address at .L4

• Jumping

Direct: jmp .L8 – Jump target is denoted by label .L8 Indirect: jmp *.L4(,%rdi,8) – Must scale by factor of 8 (labels are 64-bit = 8 Bytes on x86_64) – Fetch target from effective Address .L61 + rdi*8

• Only for 0 ≤ x ≤ 6

Jump table

AS 2016 Compiling C Control Flow

.section .rodata .align 8 .align 4 .L4: .quad .L8 # x=0 .quad .L3 # x=1 .quad .L5 # x=2 .quad .L9 # x=3 .quad .L8 # x=4 .quad .L7 # x=5 .quad .L7 # x=6

13

switch(x) { case 1: // .L56 w = y*z; break; case 2: // .L57 w = y/z; /* Fall Through */ case 3: // .L58 w += z; break; case 5: case 6: // .L60 w -= z; break; default: // .L61 w = 2; }

.section .rodata .align 8 .align 4 .L4: .quad .L8 # x=0 .quad .L3 # x=1 .quad .L5 # x=2 .quad .L9 # x=3 .quad .L8 # x=4 .quad .L7 # x=5 .quad .L7 # x=6

Jump table Jump table

AS 2016 14 Compiling C Control Flow

Sparse switch example

– Not practical to use jump table

• Would require 1000 entries

– Obvious translation into if-then-else would have max. of 9 tests

/* Return x/111 if x is multiple && <= 999. -1 otherwise */ int div111(int x) { switch(x) { case 0: return 0; case 111: return 1; case 222: return 2; case 333: return 3; case 444: return 4; case 555: return 5; case 666: return 6; case 777: return 7; case 888: return 8; case 999: return 9; default: return -1; } } 15

Sparse switch code – Compares x to possible

case values – Jumps different places

depending on outcomes

div111: cmpl $444, %edi je .L3 jle .L28 cmpl $777, %edi je .L10 jg .L11 cmpl $555, %edi movl $5, %eax je .L5 cmpl $666, %edi movb $6, %al jne .L2 .L5: rep ret

AS 2016

.L28: cmpl $111, %edi movl $1, %eax je .L5 jle .L29 cmpl $222, %edi movl $2, %eax je .L5 cmpl $333, %edi movb $3, %al je .L5 .L2: movl $-1, %eax ret

x86_64 calling conventions

AS 2016 Compiling C Control Flow 17

%rax

%rbx

%rcx

%rdx

%rsi

%rdi

%rsp

%rbp

x86-64 integer registers %r8

%r9

%r10

%r11

%r12

%r13

%r14

%r15 Callee saved; frame ptr Callee saved

Callee saved

Callee saved

Callee saved

Static chain ptr

Callee saved; base ptr

Stack pointer

Used for linking

Return value, # varargs

Argument #4

Argument #1

Argument #3 (& 2nd return)

Argument #2

Argument #6

Argument #5

AS 2016 Compiling C Control Flow 18

x86-64 integer registers

• Caller-saved registers (rax, rcx, rdx, rsi, rdi, r8-r11) – Caller stores registers he needs on his stack – Used to hold temporary quantities that need not be preserved across

calls • Callee-saved (rbx, rbp, r12-r15)

– Callee stores registers on his stack before using them – Restores them before returning – Used to hold long-lived values that should be preserved across calls

• X86-64: May pass up to 6 arguments through registers

– Depends on the type of the argument – Uses caller saved registers: rdi, rsi, rdx, rcx, r8, r9 – More arguments passed on the stack

Alignment

AS 2016 Compiling C Control Flow 20

Alignment

• Memory accessed by (aligned) chunks of 4 or 8 bytes – System dependent

– Inefficient to load or store datum that spans quad word boundaries

– Virtual memory very tricky when datum spans 2 pages

• Compiler inserts gaps in structure to ensure correct alignment of fields

AS 2016 Compiling C data structures 21

Specific cases of alignment (x86-64)

• 1 byte: char, … – no restrictions on address

• 2 bytes: short, … – lowest 1 bit of address must be 02

• 4 bytes: int, float, … – lowest 2 bits of address must be 002

• 8 bytes: double, char *, … – Windows & Linux:

• lowest 3 bits of address must be 0002

• 16 bytes: long double – Linux:

• lowest 3 bits of address must be 0002 • i.e., treated the same as a 8-byte primitive data type

AS 2016 Compiling C data structures 22

Specific cases of alignment (ia32)

• 1 byte: char, … – no restrictions on address

• 2 bytes: short, … – lowest 1 bit of address must be 02

• 4 bytes: int, float, char *, … – lowest 2 bits of address must be 002

• 8 bytes: double, … – Windows (and most other OS’s & instruction sets):

• lowest 3 bits of address must be 0002

– Linux: • lowest 2 bits of address must be 002 • i.e., treated the same as a 4-byte primitive data type

• 12 bytes: long double – Windows, Linux:

• lowest 2 bits of address must be 002 • i.e., treated the same as a 4-byte primitive data type

AS 2016 Compiling C data structures 23

struct S1 { char c; int i[2]; double v; } *p;

Satisfying alignment with structures

• Within structure: – Must satisfy element’s alignment requirement

• Overall structure placement – Each structure has alignment requirement K

• K = Largest alignment of any element – Initial address & structure length must be multiples of K

• Example (under Windows or x86-64): – K = 8, due to double element

c i[0] i[1] v 3 bytes 4 bytes

p+0 p+4 p+8 p+16 p+24

Multiple of 4 Multiple of 8

Multiple of 8 Multiple of 8 AS 2016 Compiling C data structures 24

Assignment 5

• Pen & Paper

• Translating C to Assembly and vice versa

• Alignment

• Passing Parameters to functions in Assembly

Quiz: Alignment

struct S1 { char c; double d[2]; int v; } *p;

struct S2 { double d[2]; int v; char c; } *q;

struct S3 { int id; struct S1 st; char c; } *r;

struct S4 { int i[2]; struct S3 st; char name[5]; } *s;

What is the size and the alignment of these structs?

Quiz: Alignment

struct S1 { char c; double d[2]; int v; } *p;

struct S2 { double d[2]; int v; char c; } *q;

Size 32 bytes Alignment 8 bytes

Size 24 bytes Alignment 8 bytes

Quiz: Alignment

struct S3 { int id; struct S1 st; char c; } *r;

struct S4 { int i[2]; struct S3 st; char name[5]; } *s;

Size 48 bytes Alignment 8 bytes

Size 64 bytes Alignment 8 bytes

Quiz: Parameter passing

uint64_t subtract( uint64_t a, uint64_t b) { return a - b; }

int main() { return subtract(3,2); }

Quiz: Parameter passing

uint64_t subtract( uint64_t a, uint64_t b) { return a - b; }

int main() { return subtract(3,2); }

subtract: subq %rsi, %rdi movq %rdi, %rax ret main: movl $3, %edi movl $2, %esi jmp subtract

Quiz: Parameter passing

uint64_t subtract( uint64_t* a, uint64_t* b) { return *a - *b; } int main() { uint64_t a = 3; uint64_t b = 2; return subtract(&a,&b); }

Quiz: Parameter passing

uint64_t subtract( uint64_t* a, uint64_t* b) { return *a - *b; } int main() { uint64_t a = 3; uint64_t b = 2; return subtract(&a,&b); }

subtract: movq (%rdi), %rax subq (%rsi), %rax ret main: pushq %rbp movq %rsp, %rbp subq $16, %rsp movq $3, -8(%rbp) movq $2, -16(%rbp) leaq -8(%rbp), %rdi leaq -16(%rbp), %rsi callq subtract addq $16, %rsp popq %rbp ret

Quiz: function call

foo: pushq %rbp movq %rsp, %rbp movl %edi, -4(%rbp) movl -4(%rbp), %eax imull -4(%rbp), %eax popq %rbp ret main: pushq %rbp movq %rsp, %rbp subq $32, %rsp movl %edi, -20(%rbp) movq %rsi, -32(%rbp) movl $4, -8(%rbp) movl -8(%rbp), %eax movl %eax, %edi call foo movl %eax, -4(%rbp) leave ret

What is happening here?

Quiz: function call

foo: pushq %rbp movq %rsp, %rbp movl %edi, -4(%rbp) movl -4(%rbp), %eax imull -4(%rbp), %eax popq %rbp ret main: pushq %rbp movq %rsp, %rbp subq $32, %rsp movl %edi, -20(%rbp) movq %rsi, -32(%rbp) movl $4, -8(%rbp) movl -8(%rbp), %eax movl %eax, %edi call foo movl %eax, -4(%rbp) leave ret

int foo(int x) { return( x * x); } int main(int argc, char *argv[]) { int a = 4, b; b = foo(a); }

$ cc -S –O0 hello.c

Quiz: Power of “-O2”!

foo: movl %edi, %eax imull %edi, %eax ret main: rep ret

int foo(int x) { return( x * x); } int main(int argc, char *argv[]) { int a = 4, b; b = foo(a); }

$ cc -S –O2 hello.c

What is going on here?