As little as is possible Exists on Control
As little as is possible!
Exists on Control and Compute nodes
SELECT FROM ; COUNT_BIG(*) dbo. [Fact. Internet. Sales] SELECT FROM ; SUM(*) dbo. [Fact. Internet. Sales] Control Compute SELECT FROM ; COUNT_BIG(*) dbo. [Fact. Internet. Sales]
<? xml version="1. 0" encoding="utf-8"? > <dsql_query number_nodes="1" number_distributions ="60" number_distributions_per_node ="60"> <sql>SELECT COUNT_BIG(*) FROM dbo. [Fact. Internet. Sales] </sql> <dsql_operations total_cost="0. 00192" total_number_operations ="4"> <dsql_operation_type="ON"> <location permanent="false" distribution="Control" /> <sql_operations> <sql_operation type="statement">CREATE TABLE [tempdb]. [QTables]. [QTable_7 cb 3 c 9 d 5271 e 41 bc 9 a 28 e 583 eeb 2 bd 4 c] ([col] BIGINT ) WITH(DATA_COMPRESSION=PAGE); </sql_operation> </sql_operations> </dsql_operation> <dsql_operation_type="PARTITION_MOVE"> <operation_cost="0. 00192" accumulative_cost ="0. 00192" average_rowsize="8" output_rows="1" /> <location distribution="All. Distributions " /> <source_statement >SELECT [T 1_1]. [col] AS [col] FROM (SELECT COUNT_BIG(CAST ((0) AS INT)) AS [col] FROM (SELECT 0 AS [col] FROM [JRJDW]. [dbo]. [Fact. Internet. Sales] AS T 3_1) AS T 2_1 GROUP BY [T 2_1]. [col]) AS T 1_1 </source_statement > <destination>Control</destination> <destination_table >[QTable_7 cb 3 c 9 d 5271 e 41 bc 9 a 28 e 583 eeb 2 bd 4 c] </destination_table > </dsql_operation> <dsql_operation_type="RETURN"> <location distribution="Control" /> <select>SELECT [T 1_1]. [col] AS [col] FROM (SELECT ISNULL([T 2_1]. [col], CONVERT (BIGINT, 0, 0)) AS [col] FROM (SELECT SUM([T 3_1]. [col]) AS [col] FROM [tempdb]. [QTables]. [QTable_7 cb 3 c 9 d 5271 e 41 bc 9 a 28 e 583 eeb 2 bd 4 c] AS T 3_1) AS T 2_1) AS T 1_1 </select> </dsql_operation> <dsql_operation_type="ON"> <location permanent="false" distribution="Control" /> <sql_operations> <sql_operation type="statement">DROP TABLE [tempdb]. [QTables]. [QTable_7 cb 3 c 9 d 5271 e 41 bc 9 a 28 e 583 eeb 2 bd 4 c] </sql_operation> </sql_operations> </dsql_operations> </dsql_query>
CREATE TABLE [build]. [Fact. Online. Sales] ( [Online. Sales. Key] int , [Date. Key] datetime , [Store. Key] int , [Product. Key] int , [Promotion. Key] int , [Currency. Key] int , [Customer. Key] int , [Sales. Order. Number] nvarchar(20) , [Sales. Order. Line. Number] int , [Sales. Quantity] int , [Sales. Amount] money ) WITH ( CLUSTERED COLUMNSTORE INDEX , DISTRIBUTION = ROUND_ROBIN ) ; NOT NOT NULL NULL NULL NOT NULL CREATE TABLE [build]. [Fact. Online. Sales] ( [Online. Sales. Key] int , [Date. Key] datetime , [Store. Key] int , [Product. Key] int , [Promotion. Key] int , [Currency. Key] int , [Customer. Key] int , [Sales. Order. Number] nvarchar(20) , [Sales. Order. Line. Number] int , [Sales. Quantity] int , [Sales. Amount] money ) WITH ( CLUSTERED COLUMNSTORE INDEX , DISTRIBUTION = HASH([Product. Key]) ) ; NOT NOT NULL NULL NULL NOT NULL
01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
HASH ( 01 0 03 02 N 1 ) 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
BIGINT
Distribution key is not updateable!
HASH ROUND ROBIN REPLICATED*
Left Table Right Table Replicated HASH Inner Left Right Full Conditions! For a Distributed – Distributed join to be compatible (green) join must • Contain distribution key of both columns • Match data types on distribution keys • Be an equality join Cross
Aggregation incompatibility: Two approaches:
Incompatiblity: Resolution: --EXPLAIN SELECT FROM GROUP BY OPTION ; COUNT_BIG(*) [cso]. [Fact. Online. Sales] [Store. Key] (LABEL = 'Shuffle : Aggregate')
You can move: * APS only today
--EXPLAIN CREATE TABLE [tmp]. [Dim. Employee] WITH (DISTRIBUTION = Hash(Employee. Key)) AS SELECT * FROM [cso]. [Dim. Employee] OPTION (LABEL = 'CTAS : Redistribution') ;
Causes of data movement: Additional causes of data movement:
--EXPLAIN SELECT COUNT_BIG(DISTINCT [Date. Key]) FROM [cso]. [Fact. Online. Sales] OPTION (LABEL = 'COUNT DISTINCT incompatible dist key') ; --EXPLAIN SELECT COUNT_BIG(DISTINCT ([Product. Key])) FROM [cso]. [Fact. Online. Sales] OPTION (LABEL = 'COUNT DISTINCT compatible dist key') ;
--EXPLAIN SELECT SUM([Sales. Amount]) OVER(PARTITION BY [Date. Key]) FROM [cso]. [Fact. Online. Sales] OPTION (LABEL = 'OVER() incompatible dist key') ; --EXPLAIN SELECT SUM([Sales. Amount]) OVER(ORDER BY [Product. Key]) FROM [cso]. [Fact. Online. Sales] OPTION (LABEL = 'OVER() incompatible no partition key') ; --EXPLAIN SELECT SUM([Sales. Amount]) OVER(PARTITION BY [Product. Key]) FROM [cso]. [Fact. Online. Sales] OPTION (LABEL = 'OVER() compatible dist key') ;
DWU ALTER DATABASE Contoso. Retail. DW MODIFY (service_objective = 'DW 1000') ;
CREATE DATABASE My. DB COLLATE SQL_Latin 1_General_CP 1_CI_AS ( EDITION = 'Data. Warehouse' , SERVICE_OBJECTIVE = 'DW 400' , MAXSIZE = 10240 GB ) ; 1 TB / DWU 100 is good place to start
7000 6000 5000 4000 3000 2000 1000 0 DW 100 DW 200 DW 300 DW 400 DW 500 DW 600 Capacity (GB) DW 1000 DW 1200 DW 1500 DW 2000
DWU Compressed text limits concurrent access to text files Split data across files OR Use different file format Readers Writers
1000 900 800 700 600 500 400 300 200 100 0 DW 100 DW 200 DW 300 DW 400 DW 500 DW 600 DW 1000 Max transaction size (GB) DW 1200 DW 1500 DW 2000
10000 100 smallrc mediumrc largerc xlargerc DW 100 100 200 400 DW 200 100 200 400 800 DW 300 100 200 400 800 DW 400 100 400 800 1600 smallrc DW 500 100 400 800 1600 mediumrc DW 600 100 400 800 1600 largerc xlargerc DW 1000 100 800 1600 3200 DW 1200 100 800 1600 3200 DW 1500 100 800 1600 3200 DW 2000 1600 3200 6400
DECLARE @total_num_columns INT = 10 SELECT CEILING ( ( (75497472 +(@total_num_columns * 1048576 * 16) /10 ) )/1048576. 0 ) AS Rowgroup_memorygrant_estimate_in_Mi. B_for_100 K , CEILING ( (75497472 + (@total_num_columns * 1048576 * 16) ) /1048576. 0 ) AS Rowgroup_memorygrant_estimate_in_Mi. B_for_1 M ;
5000 4750 4500 Estimated Memory Grant for one partition (Mi. B) 4250 4000 3750 3500 3250 3000 2750 2500 RG Grant Estimate for 100 K rows (Mi. B) 2250 RG Grant Estimate for 1 M rows (Mi. B) 2000 1750 1500 1250 1000 750 500 250 0 5 10 20 30 40 50 75 # Columns on a table 100 150 200 300
CREATE LOGIN newperson WITH PASSWORD = 'SQLB 1 ts!'; CREATE USER newperson for LOGIN newperson; EXEC sp_addrolemember 'loginmanager', 'newperson'; EXEC sp_addrolemember 'dbmanager', 'newperson';
SELECT FROM WHERE AND ; ro. [name] AS [db_role_name] sys. database_principals ro ro. [type_desc] = 'DATABASE_ROLE' ro. [is_fixed_role] = 0
CREATE USER newperson for LOGIN newperson ; GRANT CONTROL ON DATABASE: : Contoso. Retail. DW TO newperson ; SELECT r. [name] AS role_principal_name , m. [name] AS member_principal_name FROM sys. database_role_members rm JOIN sys. database_principals AS r ON rm. [role_principal_id] JOIN sys. database_principals AS m ON rm. [member_principal_id] WHERE r. [name] IN ('mediumrc', 'largerc', 'xlargerc') ; EXEC sp_addrolemember 'mediumrc', 'newperson' ; = r. [principal_id] = m. [principal_id]
SELECT , , , FROM WHERE ; r. [request_id] r. [command] r. [status] r. [submit_time] r. [start_time] DATEDIFF(ms, [submit_time], [start_time]) r. [resource_class] sys. dm_pdw_exec_requests r [session_id] <> session_id() AS AS Req_ID Req_command Req_Status Req_Submit. Time Req_Start. Time Req_Wait. Duration_ms Req_resource_class
DW 10 0 DW 20 0 DW 30 0 DW 40 0 DW 50 0 DW 60 0 DW 10 00 DW 12 00 DW 15 00 DW 20 00 1000 Connections 100 Queries Slots small rc (MB/dist) medium rc (MB/dist) 10 1
huge
CREATE TABLE [cso]. [Fact. Online. Sales_INS] ( [Online. Sales. Key] int NOT , [Date. Key] datetime NOT , [Store. Key] int NOT , [Product. Key] int NOT , [Promotion. Key] int NOT , [Currency. Key] int NOT , [Customer. Key] int NOT , [Sales. Order. Number] nvarchar(20) NOT , [Sales. Order. Line. Number] int , [Sales. Quantity] int NOT , [Sales. Amount] money NOT , [Return. Quantity] int NOT , [Return. Amount] money , [Discount. Quantity] int , [Discount. Amount] money , [Total. Cost] money NOT , [Unit. Cost] money , [Unit. Price] money , [ETLLoad. ID] int , [Load. Date] datetime , [Update. Date] datetime ) WITH (CLUSTERED COLUMNSTORE INDEX, DISTRIBUTION ; INSERT INTO [cso]. [Fact. Online. Sales_INS] SELECT * FROM [cso]. [Fact. Online. Sales] ; NULL NULL NULL NULL NULL NULL = HASH([Product. Key])) SELECT FROM JOIN ON WHERE ; rs. * sys. dm_pdw_exec_requests er sys. dm_pdw_request_steps rs er. [request_id] = rs. [request_id] er. [session_id] = SESSION_ID()
--EXPLAIN SELECT FROM JOIN WHERE GROUP BY SUM([Sales. Amount]) [cso]. [Fact. Online. Sales_INS] AS fos [cso]. [Dim. Product] AS dip ON fos. [Product. Key] = dip. [Product. Key] fos. Date. Key BETWEEN '2007 -01 -01 00: 00. 000' AND '2008 -01 -01 00: 00. 000' dip. [Brand. Name] ; CREATE STATISTICS stat_1 ON [cso]. [Fact. Online. Sales_INS]([Product. Key]); --EXPLAIN SELECT FROM JOIN WHERE GROUP BY ; SUM([Sales. Amount]) [cso]. [Fact. Online. Sales_INS] AS fos [cso]. [Dim. Product] AS dip ON fos. [Product. Key] = dip. [Product. Key] fos. Date. Key BETWEEN '2007 -01 -01 00: 00. 000' AND '2008 -01 -01 00: 00. 000' dip. [Brand. Name]
SELECT FROM JOIN WHERE AND ; SELECT , , FROM WHERE ; * sys. schemas s sys. tables t ON s. [schema_id] = t. [schema_id] sys. partitions p ON t. [object_id] = p. [object_id] s. [name] = 'cso' t. [name] = 'Fact. Online. Sales_INS' stats_id name AS stats_name STATS_DATE(object_id, stats_id) AS statistics_date sys. stats s s. object_id = OBJECT_ID('cso. Dim. Customer')
- Slides: 48