Created 11-24-2017 02:08 PM
Kerberized HDP-2.6.3.0.
The table has 'Id' as it's primary key and 'DepartmentId' as the foreign key:
+---------------------------------------------------------+-------------------------------------------------------------------------+-----------------------------+--+
| col_name | data_type | comment |
+---------------------------------------------------------+-------------------------------------------------------------------------+-----------------------------+--+
| # col_name | data_type | comment |
| | NULL | NULL |
| id | int | Surrogate PK is not fun |
| firstname | string | |
| lastname | string | |
| dob | date | |
| departmentid | int | |
| | NULL | NULL |
| # Detailed Table Information | NULL | NULL |
| Database: | group_hadoopdeveloper | NULL |
| Owner: | ojoqcu | NULL |
| CreateTime: | Fri Nov 24 13:39:02 UTC 2017 | NULL |
| LastAccessTime: | UNKNOWN | NULL |
| Retention: | 0 | NULL |
| Location: | hdfs://devhadoop/apps/hive/warehouse/group_hadoopdeveloper.db/employee | NULL |
| Table Type: | MANAGED_TABLE | NULL |
| Table Parameters: | NULL | NULL |
| | COLUMN_STATS_ACCURATE | {\"BASIC_STATS\":\"true\"} |
| | numFiles | 0 |
| | numRows | 0 |
| | rawDataSize | 0 |
| | totalSize | 0 |
| | transient_lastDdlTime | 1511530742 |
| | NULL | NULL |
| # Storage Information | NULL | NULL |
| SerDe Library: | org.apache.hadoop.hive.ql.io.orc.OrcSerde | NULL |
| InputFormat: | org.apache.hadoop.hive.ql.io.orc.OrcInputFormat | NULL |
| OutputFormat: | org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat | NULL |
| Compressed: | No | NULL |
| Num Buckets: | -1 | NULL |
| Bucket Columns: | [] | NULL |
| Sort Columns: | [] | NULL |
| Storage Desc Params: | NULL | NULL |
| | serialization.format | 1 |
| | NULL | NULL |
| # Constraints | NULL | NULL |
| | NULL | NULL |
| # Primary Key | NULL | NULL |
| Table: | group_hadoopdeveloper.employee | NULL |
| Constraint Name: | pk_203923149_1511530742932_0 | NULL |
| Column Names: | id | |
| | NULL | NULL |
| # Foreign Keys | NULL | NULL |
| Table: | group_hadoopdeveloper.employee | NULL |
| Constraint Name: | fk_employee_department | NULL |
| Parent Column Name:group_hadoopdeveloper.department.id | Column Name:departmentid | Key Sequence:1 |
| | NULL | NULL |
+---------------------------------------------------------+-------------------------------------------------------------------------+-----------------------------+--+I tried the following ways to retrieve the constraints:
{"columns":[{"name":"id","type":"int","comment":"Surrogate PK is not fun"},{"name":"firstname","type":"string"},{"name":"lastname","type":"string"},{"name":"dob","type":"date"},{"name":"departmentid","type":"int"}],"database":"group_hadoopdeveloper","table":"employee"}I have the following questions:
Created 02-02-2018 08:43 AM
Yes. It is possible to retrieve constraints of a Hive table. You need to query the database where Hive stores it's metastore...We are using MySQL database as Hive metastore and below is the query:
SELECT d.NAME as SCHEMA_NAME,t.TBL_NAME as TABLE_NAME, kc.CONSTRAINT_NAME, kc.CONSTRAINT_TYPE, c.COLUMN_NAME, c.TYPE_NAME, '' as REFERENCE_SCHEMA_NAME, '' as REFERENCE_TABLE_NAME, '' as REFERENCE_COL_NAME
FROM DBS d, TBLS t, SDS s, COLUMNS_V2 c, CDS cds, KEY_CONSTRAINTS kc
WHERE d.DB_ID =t.DB_ID AND t.SD_ID = s.SD_ID AND s.cd_id = c.cd_id AND cds.CD_ID = c.CD_ID AND kc.PARENT_TBL_ID = t.TBL_ID AND kc.PARENT_CD_ID=c.CD_ID AND c.INTEGER_IDX=kc.PARENT_INTEGER_IDX AND CONSTRAINT_TYPE='0' AND t.tbl_name='pk' AND d.NAME='vsatyp_schema'
UNION ALL
SELECT d.NAME as SCHEMA_NAME,t.TBL_NAME as TABLE_NAME, kc.CONSTRAINT_NAME, kc.CONSTRAINT_TYPE, c.COLUMN_NAME, c.TYPE_NAME, p_d.NAME as REFERENCE_SCHEMA_NAME, p_t.TBL_NAME as REFERENCE_TABLE_NAME, p_c.COLUMN_NAME as REFERENCE_COL_NAME
FROM hive_metastore.DBS d, hive_metastore.TBLS t, hive_metastore.SDS s, hive_metastore.COLUMNS_V2 c, hive_metastore.CDS cds, hive_metastore.KEY_CONSTRAINTS kc, hive_metastore.DBS p_d, hive_metastore.TBLS p_t, hive_metastore.SDS p_s, hive_metastore.COLUMNS_V2 p_c, hive_metastore.CDS p_cds
WHERE d.DB_ID =t.DB_ID AND t.SD_ID = s.SD_ID AND s.cd_id = c.cd_id AND cds.CD_ID = c.CD_ID AND kc.CHILD_TBL_ID = t.TBL_ID AND kc.CHILD_CD_ID=c.CD_ID AND c.INTEGER_IDX=kc.CHILD_INTEGER_IDX AND p_d.DB_ID =p_t.DB_ID AND p_t.SD_ID = p_s.SD_ID AND p_s.cd_id = p_c.cd_id AND p_cds.CD_ID = p_c.CD_ID AND kc.PARENT_TBL_ID = p_t.TBL_ID AND kc.PARENT_CD_ID=p_c.CD_ID AND p_c.INTEGER_IDX=kc.PARENT_INTEGER_IDX AND CONSTRAINT_TYPE='1' AND d.NAME in ('vsatyp_schema') and t.tbl_name = 'fk' order by SCHEMA_NAME, TABLE_NAME, CONSTRAINT_TYPE
Hope this helps...