Member since
09-11-2015
269
Posts
281
Kudos Received
55
Solutions
My Accepted Solutions
| Title | Views | Posted |
|---|---|---|
| 4193 | 03-15-2017 07:12 AM | |
| 2518 | 03-14-2017 07:08 PM | |
| 3030 | 03-14-2017 03:36 PM | |
| 2484 | 02-28-2017 04:32 PM | |
| 1713 | 02-28-2017 10:02 AM |
12-29-2016
12:33 PM
Nope, GUIDs here are just negative large numbers. Entities(hive tables, process) are identified by their qualified name and when the JSON is saved to the backend datastore, it will be stored with the actual GUIDs of entities(hive tables and hive process). Attaching diff.txt of two processes JSON, this should give you the list of changes. Let me know if you have any queries
... View more
12-29-2016
11:46 AM
2 Kudos
@Manoj Dhake
As an extension to what was answered here, just create another table named table3 and submit the below json using /api/atlas/entities REST API. [{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425513",
"version":0,
"typeName":"hive_process",
"state":"ACTIVE"
},
"typeName":"hive_process",
"values":{
"queryId":"hive_20161228094619_81b13647-4f7f-4f1b-9c08-0f64eb8dbb34",
"name":"create table table3 as select * from table2",
"startTime":"2016-12-28T09:46:19.003Z",
"queryPlan":{
},
"operationType":"CREATETABLE_AS_SELECT",
"outputs":[
{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425516",
"version":0,
"typeName":"hive_table",
"state":"ACTIVE"
},
"typeName":"hive_table",
"values":{
"tableType":"MANAGED_TABLE",
"name":"table3",
"createTime":"2016-12-28T09:46:30.000Z",
"temporary":false,
"db":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425517",
"version":0,
"typeName":"hive_db",
"state":"ACTIVE"
},
"typeName":"hive_db",
"values":{
"name":"default",
"location":"hdfs://mycluster/apps/hive/warehouse",
"description":"Default Hive database",
"ownerType":2,
"qualifiedName":"default@cl1",
"owner":"public",
"clusterName":"cl1",
"parameters":{
}
},
"traitNames":[
],
"traits":{
}
},
"retention":0,
"qualifiedName":"default.table3@cl1",
"columns":[
{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425514",
"version":0,
"typeName":"hive_column",
"state":"ACTIVE"
},
"typeName":"hive_column",
"values":{
"name":"abc",
"qualifiedName":"default.table3.abc@cl1",
"owner":"hive",
"type":"string",
"table":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425516",
"version":0,
"typeName":"hive_table",
"state":"ACTIVE"
}
},
"traitNames":[
],
"traits":{
}
}
],
"lastAccessTime":"2016-12-28T09:46:30.000Z",
"owner":"hive",
"sd":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425515",
"version":0,
"typeName":"hive_storagedesc",
"state":"ACTIVE"
},
"typeName":"hive_storagedesc",
"values":{
"location":"hdfs://mycluster/apps/hive/warehouse/table3",
"serdeInfo":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Struct",
"typeName":"hive_serde",
"values":{
"serializationLib":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
"parameters":{
"serialization.format":"1"
}
}
},
"qualifiedName":"default.table3@cl1_storage",
"outputFormat":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
"compressed":false,
"numBuckets":-1,
"inputFormat":"org.apache.hadoop.mapred.TextInputFormat",
"parameters":{
},
"storedAsSubDirectories":false,
"table":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425516",
"version":0,
"typeName":"hive_table",
"state":"ACTIVE"
}
},
"traitNames":[
],
"traits":{
}
},
"parameters":{
"rawDataSize":"0",
"numFiles":"0",
"transient_lastDdlTime":"1482918390",
"totalSize":"0",
"COLUMN_STATS_ACCURATE":"{\"BASIC_STATS\":\"true\"}",
"numRows":"0"
},
"partitionKeys":[
]
},
"traitNames":[
],
"traits":{
}
}
],
"endTime":"2016-12-28T09:46:31.211Z",
"recentQueries":[
"create table table3 as select * from table2"
],
"inputs":[
{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425520",
"version":0,
"typeName":"hive_table",
"state":"ACTIVE"
},
"typeName":"hive_table",
"values":{
"tableType":"MANAGED_TABLE",
"name":"table2",
"createTime":"2016-12-28T09:34:53.000Z",
"temporary":false,
"db":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425521",
"version":0,
"typeName":"hive_db",
"state":"ACTIVE"
},
"typeName":"hive_db",
"values":{
"name":"default",
"location":"hdfs://mycluster/apps/hive/warehouse",
"description":"Default Hive database",
"ownerType":2,
"qualifiedName":"default@cl1",
"owner":"public",
"clusterName":"cl1",
"parameters":{
}
},
"traitNames":[
],
"traits":{
}
},
"retention":0,
"qualifiedName":"default.table2@cl1",
"columns":[
{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425518",
"version":0,
"typeName":"hive_column",
"state":"ACTIVE"
},
"typeName":"hive_column",
"values":{
"name":"abc",
"qualifiedName":"default.table2.abc@cl1",
"owner":"hive",
"type":"string",
"table":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425520",
"version":0,
"typeName":"hive_table",
"state":"ACTIVE"
}
},
"traitNames":[
],
"traits":{
}
}
],
"lastAccessTime":"2016-12-28T09:34:53.000Z",
"owner":"hive",
"sd":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425519",
"version":0,
"typeName":"hive_storagedesc",
"state":"ACTIVE"
},
"typeName":"hive_storagedesc",
"values":{
"location":"hdfs://mycluster/apps/hive/warehouse/table2",
"serdeInfo":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Struct",
"typeName":"hive_serde",
"values":{
"serializationLib":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
"parameters":{
"serialization.format":"1"
}
}
},
"qualifiedName":"default.table2@cl1_storage",
"outputFormat":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
"compressed":false,
"numBuckets":-1,
"inputFormat":"org.apache.hadoop.mapred.TextInputFormat",
"parameters":{
},
"storedAsSubDirectories":false,
"table":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425520",
"version":0,
"typeName":"hive_table",
"state":"ACTIVE"
}
},
"traitNames":[
],
"traits":{
}
},
"parameters":{
"rawDataSize":"0",
"numFiles":"0",
"transient_lastDdlTime":"1482917693",
"totalSize":"0",
"COLUMN_STATS_ACCURATE":"{\"BASIC_STATS\":\"true\"}",
"numRows":"0"
},
"partitionKeys":[
]
},
"traitNames":[
],
"traits":{
}
}
],
"qualifiedName":"default.table3@cl1:1482918390000",
"queryText":"create table table3 as select * from table2",
"clusterName":"cl1",
"userName":"hive"
},
"traitNames":[
],
"traits":{
}
}]
You have to change multiple properties, basically there is a input JSON block that talks about the entity(hive table, say table2) and output JSON block that talks about the entity(hive table say table3) which acts as input and output to the process respectively. Hope this helps.
... View more
12-29-2016
11:22 AM
@Manoj Dhake Currently the hive process json links table1 and table2. For creating lineage between table2 and table3: in the json change table1 references to table2 and table2 references to table3 and submit the json. This should create lineage like table1-->table2-->table3
... View more
12-28-2016
12:28 PM
@Manoj Dhake Which HDP version are you using? This JSON would work with HDP-2.5.x release.
... View more
12-28-2016
11:59 AM
2 Kudos
As I was seeing frequent questions on REST API usage to create entity and lineage I have posted it as an HCC article. https://community.hortonworks.com/content/kbentry/74919/how-to-create-hive-table-and-lineage-using-rest-ap.html
... View more
12-28-2016
11:38 AM
20 Kudos
Problem: Of late, there are many HCC questions on how to create hive table and lineage using REST APIs in Atlas. This article will be act a step by step guide to create hive tables and lineage using REST API. Solution: As part of the solution to this FAQ, I will create two hive tables and lineage(CTAS) between them. I have tested these changes on HDP-2.5 release, so make sure you have HDP version >= 2.5. Step1: JSON for creating table1: [{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425525",
"version":0,
"typeName":"hive_db",
"state":"ACTIVE"
},
"typeName":"hive_db",
"values":{
"name":"default",
"location":"hdfs://mycluster/apps/hive/warehouse",
"description":"Default Hive database",
"ownerType":2,
"qualifiedName":"default@cl1",
"owner":"public",
"clusterName":"cl1",
"parameters":{
}
},
"traitNames":[
],
"traits":{
}
},{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425524",
"version":0,
"typeName":"hive_table",
"state":"ACTIVE"
},
"typeName":"hive_table",
"values":{
"tableType":"MANAGED_TABLE",
"name":"table1",
"createTime":"2016-12-28T09:34:53.000Z",
"temporary":false,
"db":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425525",
"version":0,
"typeName":"hive_db",
"state":"ACTIVE"
},
"typeName":"hive_db",
"values":{
"name":"default",
"location":"hdfs://mycluster/apps/hive/warehouse",
"description":"Default Hive database",
"ownerType":2,
"qualifiedName":"default@cl1",
"owner":"public",
"clusterName":"cl1",
"parameters":{
}
},
"traitNames":[
],
"traits":{
}
},
"retention":0,
"qualifiedName":"default.table1@cl1",
"columns":[
{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425522",
"version":0,
"typeName":"hive_column",
"state":"ACTIVE"
},
"typeName":"hive_column",
"values":{
"name":"abc",
"qualifiedName":"default.table1.abc@cl1",
"owner":"hive",
"type":"string",
"table":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425524",
"version":0,
"typeName":"hive_table",
"state":"ACTIVE"
}
},
"traitNames":[
],
"traits":{
}
}
],
"lastAccessTime":"2016-12-28T09:34:53.000Z",
"owner":"hive",
"sd":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425523",
"version":0,
"typeName":"hive_storagedesc",
"state":"ACTIVE"
},
"typeName":"hive_storagedesc",
"values":{
"location":"hdfs://mycluster/apps/hive/warehouse/table1",
"serdeInfo":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Struct",
"typeName":"hive_serde",
"values":{
"serializationLib":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
"parameters":{
"serialization.format":"1"
}
}
},
"qualifiedName":"default.table1@cl1_storage",
"outputFormat":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
"compressed":false,
"numBuckets":-1,
"inputFormat":"org.apache.hadoop.mapred.TextInputFormat",
"parameters":{
},
"storedAsSubDirectories":false,
"table":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425524",
"version":0,
"typeName":"hive_table",
"state":"ACTIVE"
}
},
"traitNames":[
],
"traits":{
}
},
"parameters":{
"rawDataSize":"0",
"numFiles":"0",
"transient_lastDdlTime":"1482917693",
"totalSize":"0",
"COLUMN_STATS_ACCURATE":"{\"BASIC_STATS\":\"true\"}",
"numRows":"0"
},
"partitionKeys":[
]
},
"traitNames":[
],
"traits":{
}
}]
Save the above json to a file. Step2: REST API call to create the hive table entity. curl -v -H 'Accept: application/json, text/plain, */*' -H 'Content-Type: application/json; charset=UTF-8' -u admin:admin -d @sample.json http://<IP_ADDRESS>:21000/api/atlas/entities The above will help in creating a hive table entity. Step3: JSON for creating table2: [{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425525",
"version":0,
"typeName":"hive_db",
"state":"ACTIVE"
},
"typeName":"hive_db",
"values":{
"name":"default",
"location":"hdfs://mycluster/apps/hive/warehouse",
"description":"Default Hive database",
"ownerType":2,
"qualifiedName":"default@cl1",
"owner":"public",
"clusterName":"cl1",
"parameters":{
}
},
"traitNames":[
],
"traits":{
}
},{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425524",
"version":0,
"typeName":"hive_table",
"state":"ACTIVE"
},
"typeName":"hive_table",
"values":{
"tableType":"MANAGED_TABLE",
"name":"table2",
"createTime":"2016-12-28T09:34:53.000Z",
"temporary":false,
"db":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425525",
"version":0,
"typeName":"hive_db",
"state":"ACTIVE"
},
"typeName":"hive_db",
"values":{
"name":"default",
"location":"hdfs://mycluster/apps/hive/warehouse",
"description":"Default Hive database",
"ownerType":2,
"qualifiedName":"default@cl1",
"owner":"public",
"clusterName":"cl1",
"parameters":{
}
},
"traitNames":[
],
"traits":{
}
},
"retention":0,
"qualifiedName":"default.table2@cl1",
"columns":[
{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425522",
"version":0,
"typeName":"hive_column",
"state":"ACTIVE"
},
"typeName":"hive_column",
"values":{
"name":"abc",
"qualifiedName":"default.table2.abc@cl1",
"owner":"hive",
"type":"string",
"table":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425524",
"version":0,
"typeName":"hive_table",
"state":"ACTIVE"
}
},
"traitNames":[
],
"traits":{
}
}
],
"lastAccessTime":"2016-12-28T09:34:53.000Z",
"owner":"hive",
"sd":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425523",
"version":0,
"typeName":"hive_storagedesc",
"state":"ACTIVE"
},
"typeName":"hive_storagedesc",
"values":{
"location":"hdfs://mycluster/apps/hive/warehouse/table2",
"serdeInfo":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Struct",
"typeName":"hive_serde",
"values":{
"serializationLib":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
"parameters":{
"serialization.format":"1"
}
}
},
"qualifiedName":"default.table2@cl1_storage",
"outputFormat":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
"compressed":false,
"numBuckets":-1,
"inputFormat":"org.apache.hadoop.mapred.TextInputFormat",
"parameters":{
},
"storedAsSubDirectories":false,
"table":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425524",
"version":0,
"typeName":"hive_table",
"state":"ACTIVE"
}
},
"traitNames":[
],
"traits":{
}
},
"parameters":{
"rawDataSize":"0",
"numFiles":"0",
"transient_lastDdlTime":"1482917693",
"totalSize":"0",
"COLUMN_STATS_ACCURATE":"{\"BASIC_STATS\":\"true\"}",
"numRows":"0"
},
"partitionKeys":[
]
},
"traitNames":[
],
"traits":{
}
}]
Save the above json to a file. Step4: Repeat step2 with step3 json
Step5: JSON to create lineage between above two hive tables: [{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425513",
"version":0,
"typeName":"hive_process",
"state":"ACTIVE"
},
"typeName":"hive_process",
"values":{
"queryId":"hive_20161228094619_81b13647-4f7f-4f1b-9c08-0f64eb8dbb34",
"name":"create table table2 as select * from table1",
"startTime":"2016-12-28T09:46:19.003Z",
"queryPlan":{
},
"operationType":"CREATETABLE_AS_SELECT",
"outputs":[
{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425516",
"version":0,
"typeName":"hive_table",
"state":"ACTIVE"
},
"typeName":"hive_table",
"values":{
"tableType":"MANAGED_TABLE",
"name":"table2",
"createTime":"2016-12-28T09:46:30.000Z",
"temporary":false,
"db":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425517",
"version":0,
"typeName":"hive_db",
"state":"ACTIVE"
},
"typeName":"hive_db",
"values":{
"name":"default",
"location":"hdfs://mycluster/apps/hive/warehouse",
"description":"Default Hive database",
"ownerType":2,
"qualifiedName":"default@cl1",
"owner":"public",
"clusterName":"cl1",
"parameters":{
}
},
"traitNames":[
],
"traits":{
}
},
"retention":0,
"qualifiedName":"default.table2@cl1",
"columns":[
{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425514",
"version":0,
"typeName":"hive_column",
"state":"ACTIVE"
},
"typeName":"hive_column",
"values":{
"name":"abc",
"qualifiedName":"default.table2.abc@cl1",
"owner":"hive",
"type":"string",
"table":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425516",
"version":0,
"typeName":"hive_table",
"state":"ACTIVE"
}
},
"traitNames":[
],
"traits":{
}
}
],
"lastAccessTime":"2016-12-28T09:46:30.000Z",
"owner":"hive",
"sd":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425515",
"version":0,
"typeName":"hive_storagedesc",
"state":"ACTIVE"
},
"typeName":"hive_storagedesc",
"values":{
"location":"hdfs://mycluster/apps/hive/warehouse/table2",
"serdeInfo":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Struct",
"typeName":"hive_serde",
"values":{
"serializationLib":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
"parameters":{
"serialization.format":"1"
}
}
},
"qualifiedName":"default.table2@cl1_storage",
"outputFormat":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
"compressed":false,
"numBuckets":-1,
"inputFormat":"org.apache.hadoop.mapred.TextInputFormat",
"parameters":{
},
"storedAsSubDirectories":false,
"table":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425516",
"version":0,
"typeName":"hive_table",
"state":"ACTIVE"
}
},
"traitNames":[
],
"traits":{
}
},
"parameters":{
"rawDataSize":"0",
"numFiles":"0",
"transient_lastDdlTime":"1482918390",
"totalSize":"0",
"COLUMN_STATS_ACCURATE":"{\"BASIC_STATS\":\"true\"}",
"numRows":"0"
},
"partitionKeys":[
]
},
"traitNames":[
],
"traits":{
}
}
],
"endTime":"2016-12-28T09:46:31.211Z",
"recentQueries":[
"create table table2 as select * from table1"
],
"inputs":[
{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425520",
"version":0,
"typeName":"hive_table",
"state":"ACTIVE"
},
"typeName":"hive_table",
"values":{
"tableType":"MANAGED_TABLE",
"name":"table1",
"createTime":"2016-12-28T09:34:53.000Z",
"temporary":false,
"db":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425521",
"version":0,
"typeName":"hive_db",
"state":"ACTIVE"
},
"typeName":"hive_db",
"values":{
"name":"default",
"location":"hdfs://mycluster/apps/hive/warehouse",
"description":"Default Hive database",
"ownerType":2,
"qualifiedName":"default@cl1",
"owner":"public",
"clusterName":"cl1",
"parameters":{
}
},
"traitNames":[
],
"traits":{
}
},
"retention":0,
"qualifiedName":"default.table1@cl1",
"columns":[
{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425518",
"version":0,
"typeName":"hive_column",
"state":"ACTIVE"
},
"typeName":"hive_column",
"values":{
"name":"abc",
"qualifiedName":"default.table1.abc@cl1",
"owner":"hive",
"type":"string",
"table":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425520",
"version":0,
"typeName":"hive_table",
"state":"ACTIVE"
}
},
"traitNames":[
],
"traits":{
}
}
],
"lastAccessTime":"2016-12-28T09:34:53.000Z",
"owner":"hive",
"sd":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425519",
"version":0,
"typeName":"hive_storagedesc",
"state":"ACTIVE"
},
"typeName":"hive_storagedesc",
"values":{
"location":"hdfs://mycluster/apps/hive/warehouse/table1",
"serdeInfo":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Struct",
"typeName":"hive_serde",
"values":{
"serializationLib":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
"parameters":{
"serialization.format":"1"
}
}
},
"qualifiedName":"default.table1@cl1_storage",
"outputFormat":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
"compressed":false,
"numBuckets":-1,
"inputFormat":"org.apache.hadoop.mapred.TextInputFormat",
"parameters":{
},
"storedAsSubDirectories":false,
"table":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425520",
"version":0,
"typeName":"hive_table",
"state":"ACTIVE"
}
},
"traitNames":[
],
"traits":{
}
},
"parameters":{
"rawDataSize":"0",
"numFiles":"0",
"transient_lastDdlTime":"1482917693",
"totalSize":"0",
"COLUMN_STATS_ACCURATE":"{\"BASIC_STATS\":\"true\"}",
"numRows":"0"
},
"partitionKeys":[
]
},
"traitNames":[
],
"traits":{
}
}
],
"qualifiedName":"default.table2@cl1:1482918390000",
"queryText":"create table table2 as select * from table1",
"clusterName":"cl1",
"userName":"hive"
},
"traitNames":[
],
"traits":{
}
}]
Save the above json to a file. Step6: Repeat step2 with step5 json Step7: Over Atlas UI, lineage between two entities can be seen as below. Hope this clarifies the queries on creating hive tables using rest API. Please let me know if there are any queries in the comments, I will be more than happy to help. References: REST API help: atlas.incubator.apache.org/api/rest.html Usage guide: http://atlas.incubator.apache.org/AtlasTechnicalUserGuide.pdf Atlas project page: http://atlas.incubator.apache.org/
... View more
Labels:
12-28-2016
11:24 AM
@Manoj Dhake I have updated the answer with more details, please check and let me know if it works. This time I have validated the json structure 🙂
... View more
12-28-2016
09:43 AM
1 Kudo
@Manoj Dhake Hive table entity can be created using /atlas/api/entites REST call. One such example is: Step1: JSON for creating table1: [{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425525",
"version":0,
"typeName":"hive_db",
"state":"ACTIVE"
},
"typeName":"hive_db",
"values":{
"name":"default",
"location":"hdfs://mycluster/apps/hive/warehouse",
"description":"Default Hive database",
"ownerType":2,
"qualifiedName":"default@cl1",
"owner":"public",
"clusterName":"cl1",
"parameters":{
}
},
"traitNames":[
],
"traits":{
}
},{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425524",
"version":0,
"typeName":"hive_table",
"state":"ACTIVE"
},
"typeName":"hive_table",
"values":{
"tableType":"MANAGED_TABLE",
"name":"table1",
"createTime":"2016-12-28T09:34:53.000Z",
"temporary":false,
"db":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425525",
"version":0,
"typeName":"hive_db",
"state":"ACTIVE"
},
"typeName":"hive_db",
"values":{
"name":"default",
"location":"hdfs://mycluster/apps/hive/warehouse",
"description":"Default Hive database",
"ownerType":2,
"qualifiedName":"default@cl1",
"owner":"public",
"clusterName":"cl1",
"parameters":{
}
},
"traitNames":[
],
"traits":{
}
},
"retention":0,
"qualifiedName":"default.table1@cl1",
"columns":[
{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425522",
"version":0,
"typeName":"hive_column",
"state":"ACTIVE"
},
"typeName":"hive_column",
"values":{
"name":"abc",
"qualifiedName":"default.table1.abc@cl1",
"owner":"hive",
"type":"string",
"table":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425524",
"version":0,
"typeName":"hive_table",
"state":"ACTIVE"
}
},
"traitNames":[
],
"traits":{
}
}
],
"lastAccessTime":"2016-12-28T09:34:53.000Z",
"owner":"hive",
"sd":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425523",
"version":0,
"typeName":"hive_storagedesc",
"state":"ACTIVE"
},
"typeName":"hive_storagedesc",
"values":{
"location":"hdfs://mycluster/apps/hive/warehouse/table1",
"serdeInfo":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Struct",
"typeName":"hive_serde",
"values":{
"serializationLib":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
"parameters":{
"serialization.format":"1"
}
}
},
"qualifiedName":"default.table1@cl1_storage",
"outputFormat":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
"compressed":false,
"numBuckets":-1,
"inputFormat":"org.apache.hadoop.mapred.TextInputFormat",
"parameters":{
},
"storedAsSubDirectories":false,
"table":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425524",
"version":0,
"typeName":"hive_table",
"state":"ACTIVE"
}
},
"traitNames":[
],
"traits":{
}
},
"parameters":{
"rawDataSize":"0",
"numFiles":"0",
"transient_lastDdlTime":"1482917693",
"totalSize":"0",
"COLUMN_STATS_ACCURATE":"{\"BASIC_STATS\":\"true\"}",
"numRows":"0"
},
"partitionKeys":[
]
},
"traitNames":[
],
"traits":{
}
}]
Save the above json to a file. Step2: REST API call to create the hive table entity. curl -v -H 'Accept: application/json, text/plain, */*' -H 'Content-Type: application/json; charset=UTF-8' -u admin:admin -d @sample.json http://<IP_ADDRESS>:21000/api/atlas/entities The above will help in creating a hive table entity.
Step3: JSON for creating table2: [{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425525",
"version":0,
"typeName":"hive_db",
"state":"ACTIVE"
},
"typeName":"hive_db",
"values":{
"name":"default",
"location":"hdfs://mycluster/apps/hive/warehouse",
"description":"Default Hive database",
"ownerType":2,
"qualifiedName":"default@cl1",
"owner":"public",
"clusterName":"cl1",
"parameters":{
}
},
"traitNames":[
],
"traits":{
}
},{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425524",
"version":0,
"typeName":"hive_table",
"state":"ACTIVE"
},
"typeName":"hive_table",
"values":{
"tableType":"MANAGED_TABLE",
"name":"table2",
"createTime":"2016-12-28T09:34:53.000Z",
"temporary":false,
"db":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425525",
"version":0,
"typeName":"hive_db",
"state":"ACTIVE"
},
"typeName":"hive_db",
"values":{
"name":"default",
"location":"hdfs://mycluster/apps/hive/warehouse",
"description":"Default Hive database",
"ownerType":2,
"qualifiedName":"default@cl1",
"owner":"public",
"clusterName":"cl1",
"parameters":{
}
},
"traitNames":[
],
"traits":{
}
},
"retention":0,
"qualifiedName":"default.table2@cl1",
"columns":[
{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425522",
"version":0,
"typeName":"hive_column",
"state":"ACTIVE"
},
"typeName":"hive_column",
"values":{
"name":"abc",
"qualifiedName":"default.table2.abc@cl1",
"owner":"hive",
"type":"string",
"table":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425524",
"version":0,
"typeName":"hive_table",
"state":"ACTIVE"
}
},
"traitNames":[
],
"traits":{
}
}
],
"lastAccessTime":"2016-12-28T09:34:53.000Z",
"owner":"hive",
"sd":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425523",
"version":0,
"typeName":"hive_storagedesc",
"state":"ACTIVE"
},
"typeName":"hive_storagedesc",
"values":{
"location":"hdfs://mycluster/apps/hive/warehouse/table2",
"serdeInfo":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Struct",
"typeName":"hive_serde",
"values":{
"serializationLib":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
"parameters":{
"serialization.format":"1"
}
}
},
"qualifiedName":"default.table2@cl1_storage",
"outputFormat":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
"compressed":false,
"numBuckets":-1,
"inputFormat":"org.apache.hadoop.mapred.TextInputFormat",
"parameters":{
},
"storedAsSubDirectories":false,
"table":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425524",
"version":0,
"typeName":"hive_table",
"state":"ACTIVE"
}
},
"traitNames":[
],
"traits":{
}
},
"parameters":{
"rawDataSize":"0",
"numFiles":"0",
"transient_lastDdlTime":"1482917693",
"totalSize":"0",
"COLUMN_STATS_ACCURATE":"{\"BASIC_STATS\":\"true\"}",
"numRows":"0"
},
"partitionKeys":[
]
},
"traitNames":[
],
"traits":{
}
}]
Save the above json to a file. Step4: Repeat step2 with step3 json Step5: JSON to create lineage between above two hive tables: [{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425513",
"version":0,
"typeName":"hive_process",
"state":"ACTIVE"
},
"typeName":"hive_process",
"values":{
"queryId":"hive_20161228094619_81b13647-4f7f-4f1b-9c08-0f64eb8dbb34",
"name":"create table table2 as select * from table1",
"startTime":"2016-12-28T09:46:19.003Z",
"queryPlan":{
},
"operationType":"CREATETABLE_AS_SELECT",
"outputs":[
{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425516",
"version":0,
"typeName":"hive_table",
"state":"ACTIVE"
},
"typeName":"hive_table",
"values":{
"tableType":"MANAGED_TABLE",
"name":"table2",
"createTime":"2016-12-28T09:46:30.000Z",
"temporary":false,
"db":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425517",
"version":0,
"typeName":"hive_db",
"state":"ACTIVE"
},
"typeName":"hive_db",
"values":{
"name":"default",
"location":"hdfs://mycluster/apps/hive/warehouse",
"description":"Default Hive database",
"ownerType":2,
"qualifiedName":"default@cl1",
"owner":"public",
"clusterName":"cl1",
"parameters":{
}
},
"traitNames":[
],
"traits":{
}
},
"retention":0,
"qualifiedName":"default.table2@cl1",
"columns":[
{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425514",
"version":0,
"typeName":"hive_column",
"state":"ACTIVE"
},
"typeName":"hive_column",
"values":{
"name":"abc",
"qualifiedName":"default.table2.abc@cl1",
"owner":"hive",
"type":"string",
"table":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425516",
"version":0,
"typeName":"hive_table",
"state":"ACTIVE"
}
},
"traitNames":[
],
"traits":{
}
}
],
"lastAccessTime":"2016-12-28T09:46:30.000Z",
"owner":"hive",
"sd":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425515",
"version":0,
"typeName":"hive_storagedesc",
"state":"ACTIVE"
},
"typeName":"hive_storagedesc",
"values":{
"location":"hdfs://mycluster/apps/hive/warehouse/table2",
"serdeInfo":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Struct",
"typeName":"hive_serde",
"values":{
"serializationLib":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
"parameters":{
"serialization.format":"1"
}
}
},
"qualifiedName":"default.table2@cl1_storage",
"outputFormat":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
"compressed":false,
"numBuckets":-1,
"inputFormat":"org.apache.hadoop.mapred.TextInputFormat",
"parameters":{
},
"storedAsSubDirectories":false,
"table":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425516",
"version":0,
"typeName":"hive_table",
"state":"ACTIVE"
}
},
"traitNames":[
],
"traits":{
}
},
"parameters":{
"rawDataSize":"0",
"numFiles":"0",
"transient_lastDdlTime":"1482918390",
"totalSize":"0",
"COLUMN_STATS_ACCURATE":"{\"BASIC_STATS\":\"true\"}",
"numRows":"0"
},
"partitionKeys":[
]
},
"traitNames":[
],
"traits":{
}
}
],
"endTime":"2016-12-28T09:46:31.211Z",
"recentQueries":[
"create table table2 as select * from table1"
],
"inputs":[
{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425520",
"version":0,
"typeName":"hive_table",
"state":"ACTIVE"
},
"typeName":"hive_table",
"values":{
"tableType":"MANAGED_TABLE",
"name":"table1",
"createTime":"2016-12-28T09:34:53.000Z",
"temporary":false,
"db":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425521",
"version":0,
"typeName":"hive_db",
"state":"ACTIVE"
},
"typeName":"hive_db",
"values":{
"name":"default",
"location":"hdfs://mycluster/apps/hive/warehouse",
"description":"Default Hive database",
"ownerType":2,
"qualifiedName":"default@cl1",
"owner":"public",
"clusterName":"cl1",
"parameters":{
}
},
"traitNames":[
],
"traits":{
}
},
"retention":0,
"qualifiedName":"default.table1@cl1",
"columns":[
{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425518",
"version":0,
"typeName":"hive_column",
"state":"ACTIVE"
},
"typeName":"hive_column",
"values":{
"name":"abc",
"qualifiedName":"default.table1.abc@cl1",
"owner":"hive",
"type":"string",
"table":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425520",
"version":0,
"typeName":"hive_table",
"state":"ACTIVE"
}
},
"traitNames":[
],
"traits":{
}
}
],
"lastAccessTime":"2016-12-28T09:34:53.000Z",
"owner":"hive",
"sd":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Reference",
"id":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425519",
"version":0,
"typeName":"hive_storagedesc",
"state":"ACTIVE"
},
"typeName":"hive_storagedesc",
"values":{
"location":"hdfs://mycluster/apps/hive/warehouse/table1",
"serdeInfo":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Struct",
"typeName":"hive_serde",
"values":{
"serializationLib":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
"parameters":{
"serialization.format":"1"
}
}
},
"qualifiedName":"default.table1@cl1_storage",
"outputFormat":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
"compressed":false,
"numBuckets":-1,
"inputFormat":"org.apache.hadoop.mapred.TextInputFormat",
"parameters":{
},
"storedAsSubDirectories":false,
"table":{
"jsonClass":"org.apache.atlas.typesystem.json.InstanceSerialization$_Id",
"id":"-11893021824425520",
"version":0,
"typeName":"hive_table",
"state":"ACTIVE"
}
},
"traitNames":[
],
"traits":{
}
},
"parameters":{
"rawDataSize":"0",
"numFiles":"0",
"transient_lastDdlTime":"1482917693",
"totalSize":"0",
"COLUMN_STATS_ACCURATE":"{\"BASIC_STATS\":\"true\"}",
"numRows":"0"
},
"partitionKeys":[
]
},
"traitNames":[
],
"traits":{
}
}
],
"qualifiedName":"default.table2@cl1:1482918390000",
"queryText":"create table table2 as select * from table1",
"clusterName":"cl1",
"userName":"hive"
},
"traitNames":[
],
"traits":{
}
}]
Save the above json to a file. Step6: Repeat step2 with step5 json Step7: You should be able to visualize the lineage between two entities. The curl call will be same as the above.
... View more
12-22-2016
09:38 AM
2 Kudos
@Sujatha Veeswar Please follow the below link to get atlas service deployed and configured properly. This has all the information related to dependencies and configuration settings needed for atlas. https://docs.hortonworks.com/HDPDocuments/HDP2/HDP-2.5.3/bk_data-governance/content/ch_hdp_data_governance_install_atlas_ambari.html Let me know if this resolves the issue.
... View more
12-14-2016
04:36 PM
2 Kudos
@Sunile Manjee Currently there is no support to export lineage from atlas. But yes, this is part of the atlas roadmap which should be available in the near future.
... View more