Teradata : calculate space,
1.calculate the space of each table
SELECT databasename,TABLENAME,
SUM(CURRENTPERM) / (1024 * 1024 * 1024 ) AS "ACTUAL_SPACE (GB)" ,
--The physical space occupied by the table
(MAX(CURRENTPERM) * (HASHAMP() + 1)) / (1024 * 1024 * 1024) AS "EFFECTIVE_SPACE (GB)"
--The logic space occupied by the table because of the skew of the table
FROM DBC.TABLESIZE
WHERE databasename = '?databasename'
GROUP BY databasename,TABLENAME;
2.calculate the space of each database
SELECT databasename ,
permspace/1024/1024/1024 AS "total_space (GB)"
FROM dbc.databases
WHERE databasename = '?databasename'
3.wrong way to calculate the used / unused space of specific database.
SELECT table_a.databasename,
table_a.permspace/1024/1024/1024 AS "database_total_size (GB)",
view_b.database_total_used_size/1024/1024/1024 AS "database_used_size (GB)",
(table_a.permspace - view_b.database_total_used_size)/1024/1024/1024 AS "database_unused_size (GB)"
FROM dbc.Databases table_a,
( SELECT databasename,SUM(table_total_size) database_total_used_size
FROM ( SELECT databasename,
tablename,
MAX(currentPerm) max_size_of_all_amp,
HASHAMP() + 1 amp_number ,
MAX(currentPerm) * ( HASHAMP() + 1 ) table_total_size
FROM dbc.tablesize
GROUP BY databasename,tablename
) view_a
GROUP BY databasename
) view_b
WHERE table_a.databasename = '?databasename'
AND table_a.databasename = view_b.databasename;
--Root cause : Can't treat the sum of logic space of each table as the used space.
--1. there are 2 AMPs and 2 tables
--2. each AMP has 100GB
--3. table A occupied 30GB of AMP1 and 50GB of AMP2
--4. table B occupied 60GB of AMP1 and 30GB of AMP2
--Will it success to load 5GB data of table A to AMP1? (YES)
--The logic space of Table A is 100GB and Table B is 120GB because of the skew of each table.
--Both of them are larger than( equal to) the space of AMP.
--But actually AMP1 has 10GB left , and AMP has 20GB left.
--If we want to load 5GB data of table A to AMP1 , it will be successfull.
SELECT SUM(maxPerm)/1024/1024/1024 AS "TOTAL_SIZE (GB)",
SUM(currentPerm)/1024/1024/1024 AS "USED_SIZE (GB)",
(SUM(maxPerm) - SUM(currentPerm))/1024/1024/1024 AS "UNUSED_SIZE (GB)"
FROM dbc.DiskSpace
WHERE databasename = 'P_HK_ORP_T'
GROUP BY databasename ;
--Root cause : Can't treat the sum of physical space of each AMP as the used space.
--1. there are 2 AMPs in a database
--2. each AMP has 100GB
--3. The space of AMP1 is occupied 100GB and The space of AMP2 is occupied 60GB because of the skew of tables
--If we want to load 1GB data to AMP1 and 10GB data to AMP2 ( import total 11GB to this database) , will be successfull or not ? (NO).
--There are total 40GB left for this database.
--But for AMP1 there are no space left . So it will failed.
4.Correct way to calculate the used / unused space of specific database.
SELECT SUM(maxPerm)/1024/1024/1024 AS "TOTAL_SIZE (GB)",
MAX(currentPerm) * COUNT(*)/1024/1024/1024 AS "USED_SIZE (GB)",
(SUM(maxPerm) - MAX(currentPerm) * COUNT(*))/1024/1024/1024 AS "UNUSED_SIZE (GB)"
FROM dbc.DiskSpace
WHERE databasename = 'P_HK_ORP_T'
GROUP BY databasename ;