faculty of computer science © 2006 cmput 229 memory hierarchy part 2 refreshing memory
Post on 21-Dec-2015
219 views
TRANSCRIPT
© 2006
Department of Computing Science
CMPUT 229
Writing Cache-Conscious Programs
Problem: Write C code for a function that computes the sum of the elements of a two dimensional array, a[M][N], of integers.
int SumArray(int a[][], int M, int N)
1 int SumArrayRows(int a[][], int M, int N) 2 { 3 int i, j; 4 int sum = 0; 5 6 for (i=0 ; i<M ; i++) 7 for (j=0 ; j<N ; j++) 8 sum += a[i][j]; 8 return sum; 9 }
1 int SumArrayCols(int a[][], int M, int N) 2 { 3 int i, j; 4 int sum = 0; 5 6 for (j=0 ; j<N ; i++) 7 for (i=0 ; i<M ; i++) 8 sum += a[i][j]; 8 return sum; 9 }
Byant/O’Hallaron, pp. 508
© 2006
Department of Computing Science
CMPUT 229
SumArrayRows Data Access Order
a[1][2]a[1][3]a[1][4]a[1][5]a[2][0]a[2][1]a[2]2]a[2][3]a[2][4]a[2][5]a[3][0]a[3][1]a[3][2]a[3][3]a[3][4]
•••
a[0][0]a[0][1]a[0][2]a[0][3]a[0][4]a[0][5]a[1][0]a[1][1]
0x8000 4000
0x8000 4004
0x8000 4010
0x8000 4024
0x8000 4008
0x8000 4014
0x8000 4028
0x8000 403C
0x8000 400C
0x8000 4018
0x8000 402C
0x8000 4040
0x8000 401C
0x8000 4030
0x8000 4044
0x8000 4050
0x8000 4020
0x8000 4034
0x8000 4048
0x8000 4054
0x8000 4038
0x8000 404C
0x8000 4058
•••
1 int SumArrayRows(int a[][], int M, int N) 2 { 3 int i, j; 4 int sum = 0; 5 6 for (i=0 ; i<M ; i++) 7 for (j=0 ; j<N ; j++) 8 sum += a[i][j]; 8 return sum; 9 }
Byant/O’Hallaron, pp. 508
••• Cache
Memory
© 2006
Department of Computing Science
CMPUT 229
SumArrayRows Data Access Order
a[1][2]a[1][3]a[1][4]a[1][5]a[2][0]a[2][1]a[2]2]a[2][3]a[2][4]a[2][5]a[3][0]a[3][1]a[3][2]a[3][3]a[3][4]
•••
a[0][0]a[0][1]a[0][2]a[0][3]a[0][4]a[0][5]a[1][0]a[1][1]
0x8000 4000
0x8000 4004
0x8000 4010
0x8000 4024
0x8000 4008
0x8000 4014
0x8000 4028
0x8000 403C
0x8000 400C
0x8000 4018
0x8000 402C
0x8000 4040
0x8000 401C
0x8000 4030
0x8000 4044
0x8000 4050
0x8000 4020
0x8000 4034
0x8000 4048
0x8000 4054
0x8000 4038
0x8000 404C
0x8000 4058
•••
1 int SumArrayRows(int a[][], int M, int N) 2 { 3 int i, j; 4 int sum = 0; 5 6 for (i=0 ; i<M ; i++) 7 for (j=0 ; j<N ; j++) 8 sum += a[i][j]; 8 return sum; 9 }
Byant/O’Hallaron, pp. 508
a[0][0] a[0][1] a[0][2] a[0][3]
••• Cache
Memory
© 2006
Department of Computing Science
CMPUT 229
SumArrayRows Data Access Order
a[1][2]a[1][3]a[1][4]a[1][5]a[2][0]a[2][1]a[2]2]a[2][3]a[2][4]a[2][5]a[3][0]a[3][1]a[3][2]a[3][3]a[3][4]
•••
a[0][0]
a[0][2]a[0][3]a[0][4]a[0][5]a[1][0]a[1][1]
0x8000 4000
0x8000 4004
0x8000 4010
0x8000 4024
0x8000 4008
0x8000 4014
0x8000 4028
0x8000 403C
0x8000 400C
0x8000 4018
0x8000 402C
0x8000 4040
0x8000 401C
0x8000 4030
0x8000 4044
0x8000 4050
0x8000 4020
0x8000 4034
0x8000 4048
0x8000 4054
0x8000 4038
0x8000 404C
0x8000 4058
•••
1 int SumArrayRows(int a[][], int M, int N) 2 { 3 int i, j; 4 int sum = 0; 5 6 for (i=0 ; i<M ; i++) 7 for (j=0 ; j<N ; j++) 8 sum += a[i][j]; 8 return sum; 9 }
a[0][0] a[0][1] a[0][2] a[0][3]
•••
a[0][1]
Cache
Memory Byant/O’Hallaron, pp. 508
© 2006
Department of Computing Science
CMPUT 229
SumArrayRows Data Access Order
a[1][2]a[1][3]a[1][4]a[1][5]a[2][0]a[2][1]a[2]2]a[2][3]a[2][4]a[2][5]a[3][0]a[3][1]a[3][2]a[3][3]a[3][4]
•••
a[0][0]
a[0][2]a[0][3]a[0][4]a[0][5]a[1][0]a[1][1]
0x8000 4000
0x8000 4004
0x8000 4010
0x8000 4024
0x8000 4008
0x8000 4014
0x8000 4028
0x8000 403C
0x8000 400C
0x8000 4018
0x8000 402C
0x8000 4040
0x8000 401C
0x8000 4030
0x8000 4044
0x8000 4050
0x8000 4020
0x8000 4034
0x8000 4048
0x8000 4054
0x8000 4038
0x8000 404C
0x8000 4058
•••
1 int SumArrayRows(int a[][], int M, int N) 2 { 3 int i, j; 4 int sum = 0; 5 6 for (i=0 ; i<M ; i++) 7 for (j=0 ; j<N ; j++) 8 sum += a[i][j]; 8 return sum; 9 }
a[0][0] a[0][1] a[0][2] a[0][3]
•••
a[0][1]
Cache
Memory Byant/O’Hallaron, pp. 508
© 2006
Department of Computing Science
CMPUT 229
SumArrayRows Data Access Order
a[1][2]a[1][3]a[1][4]a[1][5]a[2][0]a[2][1]a[2]2]a[2][3]a[2][4]a[2][5]a[3][0]a[3][1]a[3][2]a[3][3]a[3][4]
•••
a[0][0]
a[0][2]a[0][3]a[0][4]a[0][5]a[1][0]a[1][1]
0x8000 4000
0x8000 4004
0x8000 4010
0x8000 4024
0x8000 4008
0x8000 4014
0x8000 4028
0x8000 403C
0x8000 400C
0x8000 4018
0x8000 402C
0x8000 4040
0x8000 401C
0x8000 4030
0x8000 4044
0x8000 4050
0x8000 4020
0x8000 4034
0x8000 4048
0x8000 4054
0x8000 4038
0x8000 404C
0x8000 4058
•••
1 int SumArrayRows(int a[][], int M, int N) 2 { 3 int i, j; 4 int sum = 0; 5 6 for (i=0 ; i<M ; i++) 7 for (j=0 ; j<N ; j++) 8 sum += a[i][j]; 8 return sum; 9 }
a[0][0] a[0][1] a[0][2] a[0][3]
•••
a[0][1]
Cache
Memory Byant/O’Hallaron, pp. 508
© 2006
Department of Computing Science
CMPUT 229
SumArrayRows Data Access Order
a[1][2]a[1][3]a[1][4]a[1][5]a[2][0]a[2][1]a[2]2]a[2][3]a[2][4]a[2][5]a[3][0]a[3][1]a[3][2]a[3][3]a[3][4]
•••
a[0][0]
a[0][2]a[0][3]a[0][4]a[0][5]a[1][0]a[1][1]
0x8000 4000
0x8000 4004
0x8000 4010
0x8000 4024
0x8000 4008
0x8000 4014
0x8000 4028
0x8000 403C
0x8000 400C
0x8000 4018
0x8000 402C
0x8000 4040
0x8000 401C
0x8000 4030
0x8000 4044
0x8000 4050
0x8000 4020
0x8000 4034
0x8000 4048
0x8000 4054
0x8000 4038
0x8000 404C
0x8000 4058
•••
1 int SumArrayRows(int a[][], int M, int N) 2 { 3 int i, j; 4 int sum = 0; 5 6 for (i=0 ; i<M ; i++) 7 for (j=0 ; j<N ; j++) 8 sum += a[i][j]; 8 return sum; 9 }
a[0][0] a[0][1] a[0][2] a[0][3]
•••
a[0][5] a[1][0] a[1][1]a[0][4]
a[0][1]
Cache
Memory Byant/O’Hallaron, pp. 508
© 2006
Department of Computing Science
CMPUT 229
SumArrayRows Data Access Order
a[1][2]a[1][3]a[1][4]a[1][5]a[2][0]a[2][1]a[2]2]a[2][3]a[2][4]a[2][5]a[3][0]a[3][1]a[3][2]a[3][3]a[3][4]
•••
a[0][0]
a[0][2]a[0][3]a[0][4]a[0][5]a[1][0]a[1][1]
0x8000 4000
0x8000 4004
0x8000 4010
0x8000 4024
0x8000 4008
0x8000 4014
0x8000 4028
0x8000 403C
0x8000 400C
0x8000 4018
0x8000 402C
0x8000 4040
0x8000 401C
0x8000 4030
0x8000 4044
0x8000 4050
0x8000 4020
0x8000 4034
0x8000 4048
0x8000 4054
0x8000 4038
0x8000 404C
0x8000 4058
•••
1 int SumArrayRows(int a[][], int M, int N) 2 { 3 int i, j; 4 int sum = 0; 5 6 for (i=0 ; i<M ; i++) 7 for (j=0 ; j<N ; j++) 8 sum += a[i][j]; 8 return sum; 9 }
a[0][0] a[0][1] a[0][2] a[0][3]
•••
a[0][5] a[1][0] a[1][1]a[0][4]
a[0][1]
Cache
Memory Byant/O’Hallaron, pp. 508
© 2006
Department of Computing Science
CMPUT 229
SumArrayRows Data Access Order
a[1][2]a[1][3]a[1][4]a[1][5]a[2][0]a[2][1]a[2]2]a[2][3]a[2][4]a[2][5]a[3][0]a[3][1]a[3][2]a[3][3]a[3][4]
•••
a[0][0]
a[0][2]a[0][3]a[0][4]a[0][5]a[1][0]a[1][1]
0x8000 4000
0x8000 4004
0x8000 4010
0x8000 4024
0x8000 4008
0x8000 4014
0x8000 4028
0x8000 403C
0x8000 400C
0x8000 4018
0x8000 402C
0x8000 4040
0x8000 401C
0x8000 4030
0x8000 4044
0x8000 4050
0x8000 4020
0x8000 4034
0x8000 4048
0x8000 4054
0x8000 4038
0x8000 404C
0x8000 4058
•••
1 int SumArrayRows(int a[][], int M, int N) 2 { 3 int i, j; 4 int sum = 0; 5 6 for (i=0 ; i<M ; i++) 7 for (j=0 ; j<N ; j++) 8 sum += a[i][j]; 8 return sum; 9 }
a[0][0] a[0][1] a[0][2] a[0][3]
•••
a[0][5] a[1][0] a[1][1]a[0][4]
a[0][1]
Cache
Memory Byant/O’Hallaron, pp. 508
© 2006
Department of Computing Science
CMPUT 229
SumArrayCols Data Access Order
a[1][2]a[1][3]a[1][4]a[1][5]a[2][0]a[2][1]a[2]2]a[2][3]a[2][4]a[2][5]a[3][0]a[3][1]a[3][2]a[3][3]a[3][4]
•••
a[0][0]a[0][1]a[0][2]a[0][3]a[0][4]a[0][5]a[1][0]a[1][1]
0x8000 4000
0x8000 4004
0x8000 4010
0x8000 4024
0x8000 4008
0x8000 4014
0x8000 4028
0x8000 403C
0x8000 400C
0x8000 4018
0x8000 402C
0x8000 4040
0x8000 401C
0x8000 4030
0x8000 4044
0x8000 4050
0x8000 4020
0x8000 4034
0x8000 4048
0x8000 4054
0x8000 4038
0x8000 404C
0x8000 4058
•••
1 int SumArrayCols(int a[][], int M, int N) 2 { 3 int i, j; 4 int sum = 0; 5 6 for (j=0 ; j<N ; i++) 7 for (i=0 ; i<M ; i++) 8 sum += a[i][j]; 8 return sum; 9 }
a[0][0] a[0][1] a[0][2] a[0][3]
••• Cache
MemoryByant/O’Hallaron, pp. 508
© 2006
Department of Computing Science
CMPUT 229
SumArrayCols Data Access Order
a[1][2]a[1][3]a[1][4]a[1][5]a[2][0]a[2][1]a[2]2]a[2][3]a[2][4]a[2][5]a[3][0]a[3][1]a[3][2]a[3][3]a[3][4]
•••
a[0][0]a[0][1]a[0][2]a[0][3]a[0][4]a[0][5]a[1][0]a[1][1]
0x8000 4000
0x8000 4004
0x8000 4010
0x8000 4024
0x8000 4008
0x8000 4014
0x8000 4028
0x8000 403C
0x8000 400C
0x8000 4018
0x8000 402C
0x8000 4040
0x8000 401C
0x8000 4030
0x8000 4044
0x8000 4050
0x8000 4020
0x8000 4034
0x8000 4048
0x8000 4054
0x8000 4038
0x8000 404C
0x8000 4058
•••
1 int SumArrayCols(int a[][], int M, int N) 2 { 3 int i, j; 4 int sum = 0; 5 6 for (j=0 ; j<N ; i++) 7 for (i=0 ; i<M ; i++) 8 sum += a[i][j]; 8 return sum; 9 }
a[0][0] a[0][1] a[0][2] a[0][3]
•••
a[0][5] a[1][0] a[1][1]a[0][4]
Cache
Byant/O’Hallaron, pp. 508
© 2006
Department of Computing Science
CMPUT 229
SumArrayCols Data Access Order
a[1][2]a[1][3]a[1][4]a[1][5]a[2][0]a[2][1]a[2]2]a[2][3]a[2][4]a[2][5]a[3][0]a[3][1]a[3][2]a[3][3]a[3][4]
•••
a[0][0]a[0][1]a[0][2]a[0][3]a[0][4]a[0][5]a[1][0]a[1][1]
0x8000 4000
0x8000 4004
0x8000 4010
0x8000 4024
0x8000 4008
0x8000 4014
0x8000 4028
0x8000 403C
0x8000 400C
0x8000 4018
0x8000 402C
0x8000 4040
0x8000 401C
0x8000 4030
0x8000 4044
0x8000 4050
0x8000 4020
0x8000 4034
0x8000 4048
0x8000 4054
0x8000 4038
0x8000 404C
0x8000 4058
•••
1 int SumArrayCols(int a[][], int M, int N) 2 { 3 int i, j; 4 int sum = 0; 5 6 for (j=0 ; j<N ; i++) 7 for (i=0 ; i<M ; i++) 8 sum += a[i][j]; 8 return sum; 9 }
a[0][0] a[0][1] a[0][2] a[0][3]
a[2][1] a[2][2] a[2][3]
•••
a[2][0]a[0][5] a[1][0] a[1][1]a[0][4]
Cache
Byant/O’Hallaron, pp. 508
© 2006
Department of Computing Science
CMPUT 229
The Cost of Programming Productivity
Easy-to-read and easy-to-maintain code often result
in lower runtime performance.
StudentClass
University
© 2006
Department of Computing Science
CMPUT 229
The Cost of Programming Productivity
Abstraction
Inheritance
StudentProfessor Support Staff
Person
© 2006
Department of Computing Science
CMPUT 229
The Cost of Programming Productivity
Data Encapsulation
Person
Date of BirthGender
AddressCitizenship
Name
Driver Lic.
Student
FacultyDate of Adm
DepartmentProgram
Univ. ID
Classes Enr.Grades
© 2006
Department of Computing Science
CMPUT 229
Data Locality Primer: Cache Organization
POWER5 Cache Organization
– L1 Data Cache: 32 Kbytes, 128-byte cache lines
– L2 Cache: 1.44 Mbytes, 128-byte cache lines
– L3 Cache: 32 Mbytes, 512-byte cache lines
© 2006
Department of Computing Science
CMPUT 229
Data Locality Primer: Cache OrganizationBytes
FacultyDate of Adm
DepartmentProgram
Univ. ID
Classes Enr.Grades
Student:
1 byte4 bytes
1 byte2 bytes
4 bytes
4 bytes4 bytes4 bytes
Date of BirthGender
AddressCitizenship
Name
Driver Lic.
Person:
4 byte1 bytes
32 bytes16 bytes
32 bytes
4 bytes
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 ••• 127
0
2
•••
255
Cac
he L
ines
© 2006
Department of Computing Science
CMPUT 229
Data Locality Primer: Data in Memory
Mem
ory
Add
ress
Bytes
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 ••• 127
0
128
256
384
Univ. ID Date of Adm. Fa. De Progr. •••Classes Enr. Grades
••• Fa. De Progr. Classes Enr. Grades Univ. ID
Univ. ID Date of Adm. Fa. De Progr. •••Classes Enr. Grades
••• Fa. De Progr. Classes Enr. Grades Univ. ID
FacultyDate of Adm
DepartmentProgram
Univ. ID
Classes Enr.Grades
Student:
1 byte4 bytes
1 byte2 bytes
4 bytes
4 bytes4 bytes4 bytes
© 2006
Department of Computing Science
CMPUT 229
0 ••• 30 31 32 33 ••• 36 37 ••• 47 48 ••• 51 52 ••• 69 ••• 84 85 ••• 89
768
1024
1152
1280Mem
ory
Add
ress
Data Locality Primer: Data in Memory
Name DofB Ge Citizens. Address Dr. Lic.
Namedress Ge Citizens. Dr. Lic. DofB
Name DofB Ge Citizens. Address Dr. Lic.
Namedress Ge Citizens. Dr. Lic. DofB
Date of BirthGender
AddressCitizenship
Name
Driver Lic.
Person:
4 byte1 bytes
32 bytes16 bytes
32 bytes
4 bytes
© 2006
Department of Computing Science
CMPUT 229
0 ••• 30 31 32 33 ••• 36 37 ••• 47 48 ••• 51 52 ••• 69 ••• 84 85 ••• 89
768
1024
1152
1280Mem
ory
Add
ress
Data Locality Primer: Data in Memory
Mem
ory
Add
ress
Bytes
Name DofB Ge Citizens. Address Dr. Lic.
Namedress Ge Citizens. Dr. Lic. DofB
Name DofB Ge Citizens. Address Dr. Lic.
Namedress Ge Citizens. Dr. Lic. DofB
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 ••• 127
0
128
256
384
Univ. ID Date of Adm. Fa. De Progr. •••Classes Enr. Grades
••• Fa. De Progr. Classes Enr. Grades Univ. ID
Univ. ID Date of Adm. Fa. De Progr. •••Classes Enr. Grades
••• Fa. De Progr. Classes Enr. Grades Univ. ID
© 2006
Department of Computing Science
CMPUT 229
Example: A search through the data structures
How many Computing Science students are
younger than 23 year old?Bytes
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 ••• 127
0
2
•••
255
Univ. ID Date of Adm. Fa. De Progr. •••Classes Enr. Grades
Cac
he L
ines
© 2006
Department of Computing Science
CMPUT 229
Example: A search through the data structures
How many Computing Science students are younger than 23 year old?
Load 128 bytes and uses 5 bytes!
Bytes
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 ••• 127
0
2
•••
255
Univ. ID Date of Adm. Fa. De Progr. •••Classes Enr. Grades
Cac
he L
ines
© 2006
Department of Computing Science
CMPUT 229
Example: A search through the data structures
How many Computing Science students are younger than 23 year old?
Load 128 bytes and uses 5 bytes!
Bytes
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 ••• 127
0
2
•••
255
Univ. ID Date of Adm. Fa. De Progr. •••Classes Enr. Grades
Name DofB Ge Citizens. Address Dr. Lic.
Cac
he L
ines
© 2006
Department of Computing Science
CMPUT 229
Example: A search through the data structures
How many Computing Science students are younger than 23 year old?
Load 128 bytes and uses 5.3 bytes!
Load 128 bytes and uses 5.8 bytes!
Bytes
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 ••• 127
0
2
•••
255
Univ. ID Date of Adm. Fa. De Progr. •••Classes Enr. Grades
Name DofB Ge Citizens. Address Dr. Lic.
Cac
he L
ines
© 2006
Department of Computing Science
CMPUT 229
Data Reshaping for Arrays of StructuresStudent *ListOfStudents;
….
ListOfStudents = (Student*)malloc(….);
Univ. ID Date of Adm. Fa. De Progr. •••Classes Enr. Grades
Univ. ID Date of Adm. Fa. De Progr. •••Classes Enr. Grades
Univ. ID Date of Adm. Fa. De Progr. •••Classes Enr. Grades
Univ. ID
Date of Adm.
Fa.
De
Progr.
Univ. ID
Date of Adm.
Fa.
De
Progr.
Univ. ID
Date of Adm.
Fa.
De
Progr.
••• •••
•••
•••
•••
•••
•••
•••
© 2006
Department of Computing Science
CMPUT 229
Reshaping Linked Data Structures
E.g. A linked list of students
struct student { int age; int studentNumber; int studentProgram;float averageGrade;struct student *next;
};
age num gpaprog age num gpaprog …
© 2006
Department of Computing Science
CMPUT 229
Maximal Structure Splitting
age1 num1 gpa1prog1
age2 num2 gpa2prog2
…
age3 num3 gpa3prog3
age1 age2 age3
num1 num2 num3
prog1 prog2 prog3
gpa1 gpa2 gpa3
next1 next2 next3
© 2006
Department of Computing Science
CMPUT 229
Is it safe to transform a given data structure?
Build alias set
– If a pointer P points to the structure
• Then all the objects in the points-to set of P must have the
same layout.
• The layout of two structures is the same if each field has the
same offset and the same length.
© 2006
Department of Computing Science
CMPUT 229
Pool Allocation
Intercept mallocs and
replace by pool
allocation: each
structure layout gets
its own pool.
If pool is full another
pool can be allocated
© 2006
Department of Computing Science
CMPUT 229
Pool Allocation
age1
num1
prog1
gpa1
next1
Intercept mallocs and
replace by pool
allocation: each
structure layout gets
its own pool.
© 2006
Department of Computing Science
CMPUT 229
Pool Allocation
age1 age2
num1 num2
prog1 prog2
gpa1 gpa2
next1 next2
Intercept mallocs and
replace by pool
allocation: each
structure layout gets
its own pool.
© 2006
Department of Computing Science
CMPUT 229
Pool Allocation
age1 age2 age3
num1 num2 num3
prog1 prog2 prog3
gpa1 gpa2 gpa3
next1 next2 next3
Intercept mallocs and
replace by pool
allocation: each
structure layout gets
its own pool.
© 2006
Department of Computing Science
CMPUT 229
Pool Allocation
age1 age2 age3
num1 num2 num3
prog1 prog2 prog3
gpa1 gpa2 gpa3
next1 next2 next3
age4
num4
prog4
gpa4
next4
Intercept mallocs and
replace by pool
allocation: each
structure layout gets
its own pool.
© 2006
Department of Computing Science
CMPUT 229
Pool Allocation
age1 age2 age3
num1 num2 num3
prog1 prog2 prog3
gpa1 gpa2 gpa3
next1 next2 next3
age4
num4
prog4
gpa4
next4
age5
num5
prog5
gpa5
next6
Intercept mallocs and
replace by pool
allocation: each
structure layout gets
its own pool.
© 2006
Department of Computing Science
CMPUT 229
Pool Allocation
age1 age2 age3
num1 num2 num3
prog1 prog2 prog3
gpa1 gpa2 gpa3
next1 next2 next3
age4
num4
prog4
gpa4
next4
age5
num5
prog5
gpa5
next6
Intercept mallocs and
replace by pool
allocation: each
structure layout gets
its own pool.
If pool is full another
pool can be allocated
© 2006
Department of Computing Science
CMPUT 229
Pool Allocation
age1 age2 age3
num1 num2 num3
prog1 prog2 prog3
gpa1 gpa2 gpa3
next1 next2 next3
age4
num4
prog4
gpa4
next4
age5
num5
prog5
gpa5
next6
age7
num7
prog7
gpa7
next7
Intercept mallocs and
replace by pool
allocation: each
structure layout gets
its own pool.
If pool is full another
pool can be allocated
© 2006
Department of Computing Science
CMPUT 229
Pointer Dereferencing - Before
struct student { int age; int studentNumber; int studentProgram;float averageGrade;struct student *next;
};
struct student *s = malloc (sizeof (struct student));
s->age = 21;
s->averageGrade = 3.8;
s->age == *(s + 0)
s->averageGrade == *(s + 12)
age num gpaprog0 4 8 12 16
age num gpaprog …0 4 8 12 16
s
© 2006
Department of Computing Science
CMPUT 229
Uniform Structure Splitting
Requires that all in the structure have the same
number of bytes
– Advantage
• Simpler address computation
– Disadvantage
• Either restrict the application of the technique
• Or wastes memory with padding to create same-length fields
© 2006
Department of Computing Science
CMPUT 229
Uniform Splitting Pointer Transformation
age1 age2 age3
num1 num2 num3
prog1 prog2 prog3
gpa1 gpa2 gpa3
next1 next2 next3
s1->age == *(s1 + 0)
s1->gpa == *(s1 + (3 * pool_field_len))
s1
Pool_field_len is the same for each field
3 * pool_field_len
pool_field_len
© 2006
Department of Computing Science
CMPUT 229
Non-Uniform Structure Splitting
Requires pools to be aligned by the size of the pool.
E.g. If the pools are 4k then they must be aligned on
4k boundaries.
More general
Address calculation is more involved
© 2006
Department of Computing Science
CMPUT 229
Non-UniformExample
struct example { type_2 a; /* 4 bytes */type_8 b; /* 8 bytes */type_4 c; /* 4 bytes */};
s
How can the compiler
find the address to
access:
s->c
© 2006
Department of Computing Science
CMPUT 229
Non-UniformExample
struct example { type_2 a; /* 4 bytes */type_8 b; /* 8 bytes */type_4 c; /* 4 bytes */};
s
How can the compiler
find the address to
access:
s->c
pool_base = s & 0x0…0FFF
index = (s – pool_base) / 2
field_base = (2+8)*num_structs_per_pool
s->c = *(s + field_base + 4*index - index*2)
s->c = *(s + field_base + 4*index - s + pool_base)
s->c = *(field_base + 4*index + pool_base)
© 2006
Department of Computing Science
CMPUT 229
Experiments - Micro Benchmarks (Speedup)Power 4 Power 5
© 2006
Department of Computing Science
CMPUT 229
Experiments - Micro Benchmarks (Instruction Count)
Power 4 Power 5
© 2006
Department of Computing Science
CMPUT 229
Experiments - Micro Benchmarks (CPI)
Power 4 Power 5
© 2006
Department of Computing Science
CMPUT 229
Experiments - Micro Benchmarks (DTLB Misses)
Power 4 Power 5
© 2006
Department of Computing Science
CMPUT 229
Experiments - Micro Benchmarks (L1D Misses)
Power 4 Power 5
© 2006
Department of Computing Science
CMPUT 229
Experiments - Micro Benchmarks (L2 Misses)
Power 4 Power 5
© 2006
Department of Computing Science
CMPUT 229
Experiments - Micro Benchmarks (L3 Misses)
Power 4 Power 5
© 2006
Department of Computing Science
CMPUT 229
Experiments
Evaluated SPEC 2000, Olden and LLU
Many opportunities in SPEC missed
– Pointer analysis didn’t have enough precision to identify
opportunities in the SPEC 2000 benchmarks
– Could only identify small opportunities
– No impact on performance