real-world performance training - oracle · real-world performance training sql reference ......

23

Upload: dangkhue

Post on 20-May-2018

242 views

Category:

Documents


2 download

TRANSCRIPT

Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |

Real-World Performance TrainingSQL Reference

Real-World Performance Team

Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |

Sub-query ( correlated )

SQL

select empno, ename, sal

from emp e1

where sal > (

select avg(sal)

from emp e2

where e2.deptno = e1.deptno

group by deptno

)

Sub-query

Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |

Inline View

SQL

select empno, ename, sal, avg_sal

from emp e

join ( select deptno

, avg(sal) avg_sal

from emp

group by deptno

) v

on e.deptno = v.deptno

InlineView

Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |

Common Table Expression (CTE)

SQL

with

avg_sal as

( select deptno

, avg(sal) avg_sal

from emp

group by deptno

)

select empno, ename, sal, avg_sal

from emp e

join avg_sal a

on e.deptno = a.deptno

CommonTable Expression

Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |

Window Function

SQL

select dname, ename, sal

, rank()

over (

partition by e.deptno

order by sal desc

) sal_rank

from emp e

join dept d

on e.deptno = d.deptno

order by dname, sal_rank

analytic function

partition by clause

order by clause forfunction

Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |

Anatomy of an External Table

Data Loading

create table FAST_LOAD

(

column definition list ...

)

organization external

( type oracle_loader

default directory SPEEDY_FILESYSTEM

preprocessor exec_file_dir:’zcat.sh’

characterset ‘ZHS16GBK’

badfile ERROR_DUMP:’FAST_LOAD.bad’

logfile ERROR_DUMP:’FAST_LOAD.log’

(

file column mapping list ...

)

location

(file_1.gz, file_2.gz, file_3.gz, file_4.gz )

reject limit 1000

parallel 4

/

External Table Definition

Reference the Mount Point Uncompress the data

using a secure wrapper

The Character set must match the Character set of the Files

Note Compressed Files

Parallel should match or be less than the number of Files

Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |

Gathering Statistics

Begin

DBMS_STATS.GATHER_TABLE_STATS(USER,'TABLE_NAME');

End;

/

Gathering Table Statistics (Default)

Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |

Gathering Statistics

• Use the dbms_stats.create_extended_stats function to create extended statistics

Select

dbms_stats.create_extended_stats(USER,'CARS','(MAKE,MODEL)')

from dual;

Select

dbms_stats.create_extended_stats(USER,'EMP','(UPPER(EMP_LAST_NAME))')

from dual;

Extended Statistics

Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |

Set based processing

Data Processing Techniques

insert /*+ append */ into west

select *

from emp

where deptno = 20;

commit;

insert /*+ append */ into east

select *

from emp

where deptno != 20;

commit;

Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |

Multiple ways to get the same result

Data Processing Techniques

insert /*+ append */ first

when deptno = 20 then

into west values …

else

into east values …

select *

from emp;

commit;

Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |

Duplicate Rows

Data Validation SQL

Simply Check the Data Obtain one of the ROWIDs of duplicates to investigate

Query the rows you wish to keep eliminating duplicates based on the load time

select

pk,count(*)

from DIRTY_DATA

group by pk

having count(*)>1;

select

pk,

count(*),

max(rowid)

from DIRTY_DATA

group by pk

having count(*)>1;

select column_list

from

(

select

a.*,row_number() over

(

partition by pk

order by load_time desc

) rowno

from DIRTY_DATA a

)

where rowno=1

Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |

Orphaned Row Check

Data Validation SQL

Look For Orphans Look for Parents with no Children

select C.rowid

from PARENT P

right outer join

CHILD C

on P.pk = C.fk

where P.pk is null;

select P.rowid

from PARENT P

left outer join

CHILD C

on P.pk = C.fk

where C.fk is null;

Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |

Delete

Rewriting DML

alter session enable parallel

dml;

delete from tx_log

where

symbol = ‘JAVA’;

commit;

alter session enable parallel dml;

insert /*+ append */ into tx_log_new

select * from tx_log

where

symbol != ‘JAVA’;

alter table tx_log

rename to tx_log_old;

alter table tx_log_new

rename to tx_log;

or

alter table tx_log exchange partition

part_201409 with table tx_log_new;

The predicate is the compliment of the DELETE, it selects the rows to keep

Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |

Update

Rewriting DML

alter session enable parallel dml;

update sales_ledger

set tax_rate = 9.9

where tax_rate = 9.3

and sales_date > ‘01-Jan-09’;

commit;

alter session enable parallel dml;

insert /*+ append */ into tx_log_new

select

<column list>,

case

sales_date>‘01-Jan-09’

and

tax_rate=9.3

then

9.9

else

tax_rate

end,

<column list>

from sales_ledger;

The UPDATE predicates are moved to the SELECT list in a CASE statement to transform the rows

Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |

• For each row in lineorder, how many rows are returned from customer?

• Without constraints, what if lo_custkeyis NULL?

• Even if lo_custkey is NOT NULL, how many rows will join with customer? 0? 1? More than 1?

• NOT NULL constraints are essentially free, no sense not to implement

• Several optimizations depend on this information!

Rules and FrameworkNOT NULL Constraints

FROM lineorder

JOIN customer ON

lo_custkey = c_custkey

SQL> desc lineorder

Name Null? Type

----------- ------- --------

...

LO_CUSTKEY NOT NULL NUMBER

...

SQL> desc customer

Name Null? Type

--------- --------- --------

C_CUSTKEY NOT NULL NUMBER

....

ALTER TABLE tname MODIFY (cname NOT NULL)

Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |

• There must be a primary key on the dimension table

• There must be a foreign key on the fact table

• The state of the constraint depends on trust in the ETL process and volume of data

• Constraints must be in RELY state

• It is not necessary to enforce constraints on the fact table

• You need to tell the optimizer you can trust constraints in the RELY state

With PK/FK constraints, exactly 1 row is returned from dimension table for a fact row

Rules and FrameworkPrimary Key and Foreign Key Constraints

alter table customer

add constraint customer_pk

primary key (c_custkey)

RELY;

alter table lineorder

add constraint lo_customer_pk

foreign key (lo_custkey)

references

customer (c_custkey)

RELY

DISABLE NOVALIDATE;

alter system

set query_rewrite_integrity=TRUSTED;

Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |

• Example: Interval partitioning

Rules and FrameworkPartition the Fact Table on the Time Dimension

CREATE TABLE

LINEORDER

(

"LO_ORDERKEY" NUMBER NOT NULL ENABLE

,"LO_LINENUMBER" NUMBER

... other columns

)

partition by range

(

LO_ORDERDATE

)

interval (numtoyminterval(1, 'MONTH'))

(

partition R199201 values less than

(to_date('19920201', 'YYYYMMDD'))

)

;

Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |

Configure Table for In-Memory Column Store

• Use MEMCOMPRESS FOR QUERY for performance

• Use DUPLICATE ALL to ensure extents are loaded in all RAC instances on Exadata

SQL> alter table lineorder

2 inmemory memcompress for query

3 duplicate all;

Table altered.

Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |

Populate the In-Memory Column Store

• Query the table for on-demand population

• Ensure the optimizer is choosing a full scan

SQL> select /*+ full */ count(*)

2 from lineorder;

Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |

Validate Population Status

• Query GV$IM_SEGMENTS

• Look for populate_status=‘COMPLETED’ and bytes_not_populated=0

set lines 150

column name format a30

col pname format a30

column owner format a20

column segment_name format a30

column populate_status format a20

column bytes_not_populated format 999,999,999,999.99

set echo on

SELECT v.inst_id,v.owner,

v.segment_name name,

v.partition_name pname,

v.populate_status status,

v.bytes_not_populated

FROM gv$im_segments v

order by 3,1,2

/

Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |