Created on 04-22-2016 06:48 AM - edited 09-16-2022 03:15 AM
I have an avsc like below, in which record type is under record type. When I am trying to import in hive, I get table created with columns error schema and all..
Kindly suggest me how to import such avsc in hive.
CREATE TABLE metro ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' TBLPROPERTIES ('avro.schema.literal'='{ "namespace": "ly.stealth.xmlavro",
"protocol": "xml",
"type" : "record",
"name" : "MetroBillType",
"fields" : [ { "name" : "BillDate", "type" : "string" }, { "name" : "BillTime", "type" : "string" }, { "name" : "Remit_CompanyName", "type" : "string" }, { "name" : "Remit_Addr", "type" : "string"
}, { "name" : "Remit_CityStZip", "type" : "string"
}, { "name" : "Remit_Phone", "type" : "string"
}, { "name" : "Remit_Fax", "type" : "string"
}, { "name" : "Remit_TaxID", "type" : "string"
}, { "name" : "BillAcct_Break",
"type" :
{ "type" : "record", "name" : "BillAcct_BreakType",
"fields" : [ { "name" : "BillAcct", "type" : "string"
}, { "name" : "Invoice_Number", "type" : "int" }, { "name" : "Acct_Break", "type" : { "type" : "record", "name" : "Acct_BreakType", "fields" : [ { "name" : "Acct", "type" : "string"
}, { "name" : "Items", "type" : { "type" : "record", "name" : "ItemsType", "fields" : [ { "name" : "Item", "type" : { "type" : "array", "items" : { "type" : "record", "name" : "ItemType", "fields" : [ { "name" : "Account", "type" : "string"
}, { "name" : "Claim_Number", "type" : "string"
}, { "name" : "Insured_Name", "type" : "string"
}, { "name" : "Price", "type" : "float"
}, { "name" : "Control_Number", "type" : "int"
}, { "name" : "State", "type" : "string"
}, { "name" : "Report_Type_Code", "type" : "string"
}, { "name" : "Report_Type_Desc", "type" : "string"
}, { "name" : "Policy_Number", "type" : "string"
}, { "name" : "Date_of_Loss", "type" : "string"
}, { "name" : "Date_Received", "type" : "string"
}, { "name" : "Date_Closed", "type" : "string"
}, { "name" : "Days_to_Fill", "type" : "int"
}, { "name" : "Police_Dept", "type" : "string" }, {
"name" : "Attention", "type" : "string"
}, { "name" : "RequestID", "type" : "int"
}, { "name" : "ForceDup", "type" : "string"
}, { "name" : "BillAcct", "type" : "string"
}, { "name" : "BillCode", "type" : "string"
} ] } } } ] } }, { "name" : "Acct_Total", "type" : "float" }, { "name" : "Acct_Count", "type" : "int" } ] } }, { "name" : "Bill_Total", "type" : "float" }, { "name" : "Bill_Count", "type" : "int" } ] } }, { "name" : "Previous_Balance", "type" : "int" } ] }');
Thanks
Mamta
Created 04-22-2016 08:26 AM
There seems to be a length issue. If I compress your avro schema to one line it works in Hiev view and in beeline
DROP TABLE metro; CREATE TABLE metro ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' TBLPROPERTIES ('avro.schema.literal'='{"namespace":"ly.stealth.xmlavro","protocol":"xml","type":"record","name":"MetroBillType","fields":[{"name":"BillDate","type":"string"},{"name":"BillTime","type":"string"},{"name":"Remit_CompanyName","type":"string"},{"name":"Remit_Addr","type":"string"},{"name":"Remit_CityStZip","type":"string"},{"name":"Remit_Phone","type":"string"},{"name":"Remit_Fax","type":"string"},{"name":"Remit_TaxID","type":"string"},{"name":"BillAcct_Break","type":{"type":"record","name":"BillAcct_BreakType","fields":[{"name":"BillAcct","type":"string"},{"name":"Invoice_Number","type":"int"},{"name":"Acct_Break","type":{"type":"record","name":"Acct_BreakType","fields":[{"name":"Acct","type":"string"},{"name":"Items","type":{"type":"record","name":"ItemsType","fields":[{"name":"Item","type":{"type":"array","items":{"type":"record","name":"ItemType","fields":[{"name":"Account","type":"string"},{"name":"Claim_Number","type":"string"},{"name":"Insured_Name","type":"string"},{"name":"Price","type":"float"},{"name":"Control_Number","type":"int"},{"name":"State","type":"string"},{"name":"Report_Type_Code","type":"string"},{"name":"Report_Type_Desc","type":"string"},{"name":"Policy_Number","type":"string"},{"name":"Date_of_Loss","type":"string"},{"name":"Date_Received","type":"string"},{"name":"Date_Closed","type":"string"},{"name":"Days_to_Fill","type":"int"},{"name":"Police_Dept","type":"string"},{"name":"Attention","type":"string"},{"name":"RequestID","type":"int"},{"name":"ForceDup","type":"string"},{"name":"BillAcct","type":"string"},{"name":"BillCode","type":"string"}]}}}]}},{"name":"Acct_Total","type":"float"},{"name":"Acct_Count","type":"int"}]}},{"name":"Bill_Total","type":"float"},{"name":"Bill_Count","type":"int"}]}},{"name":"Previous_Balance","type":"int"}]}');
SELECT * FROM metro; +-----------------+-----------------+--------------------------+-------------------+------------------------+--------------------+------------------+--------------------+-----------------------+-------------------------+--+ | metro.billdate | metro.billtime | metro.remit_companyname | metro.remit_addr | metro.remit_citystzip | metro.remit_phone | metro.remit_fax | metro.remit_taxid | metro.billacct_break | metro.previous_balance | +-----------------+-----------------+--------------------------+-------------------+------------------------+--------------------+------------------+--------------------+-----------------------+-------------------------+--+ +-----------------+-----------------+--------------------------+-------------------+------------------------+--------------------+------------------+--------------------+-----------------------+-------------------------+--+
With the nicely formatted avro schema I receive: "Unexpected end-of-input within/between ARRAY entries" which indicates that there is a length restriction for this parameter. Else try to use the avro.schema.url way.
Created 04-22-2016 08:26 AM
There seems to be a length issue. If I compress your avro schema to one line it works in Hiev view and in beeline
DROP TABLE metro; CREATE TABLE metro ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' TBLPROPERTIES ('avro.schema.literal'='{"namespace":"ly.stealth.xmlavro","protocol":"xml","type":"record","name":"MetroBillType","fields":[{"name":"BillDate","type":"string"},{"name":"BillTime","type":"string"},{"name":"Remit_CompanyName","type":"string"},{"name":"Remit_Addr","type":"string"},{"name":"Remit_CityStZip","type":"string"},{"name":"Remit_Phone","type":"string"},{"name":"Remit_Fax","type":"string"},{"name":"Remit_TaxID","type":"string"},{"name":"BillAcct_Break","type":{"type":"record","name":"BillAcct_BreakType","fields":[{"name":"BillAcct","type":"string"},{"name":"Invoice_Number","type":"int"},{"name":"Acct_Break","type":{"type":"record","name":"Acct_BreakType","fields":[{"name":"Acct","type":"string"},{"name":"Items","type":{"type":"record","name":"ItemsType","fields":[{"name":"Item","type":{"type":"array","items":{"type":"record","name":"ItemType","fields":[{"name":"Account","type":"string"},{"name":"Claim_Number","type":"string"},{"name":"Insured_Name","type":"string"},{"name":"Price","type":"float"},{"name":"Control_Number","type":"int"},{"name":"State","type":"string"},{"name":"Report_Type_Code","type":"string"},{"name":"Report_Type_Desc","type":"string"},{"name":"Policy_Number","type":"string"},{"name":"Date_of_Loss","type":"string"},{"name":"Date_Received","type":"string"},{"name":"Date_Closed","type":"string"},{"name":"Days_to_Fill","type":"int"},{"name":"Police_Dept","type":"string"},{"name":"Attention","type":"string"},{"name":"RequestID","type":"int"},{"name":"ForceDup","type":"string"},{"name":"BillAcct","type":"string"},{"name":"BillCode","type":"string"}]}}}]}},{"name":"Acct_Total","type":"float"},{"name":"Acct_Count","type":"int"}]}},{"name":"Bill_Total","type":"float"},{"name":"Bill_Count","type":"int"}]}},{"name":"Previous_Balance","type":"int"}]}');
SELECT * FROM metro; +-----------------+-----------------+--------------------------+-------------------+------------------------+--------------------+------------------+--------------------+-----------------------+-------------------------+--+ | metro.billdate | metro.billtime | metro.remit_companyname | metro.remit_addr | metro.remit_citystzip | metro.remit_phone | metro.remit_fax | metro.remit_taxid | metro.billacct_break | metro.previous_balance | +-----------------+-----------------+--------------------------+-------------------+------------------------+--------------------+------------------+--------------------+-----------------------+-------------------------+--+ +-----------------+-----------------+--------------------------+-------------------+------------------------+--------------------+------------------+--------------------+-----------------------+-------------------------+--+
With the nicely formatted avro schema I receive: "Unexpected end-of-input within/between ARRAY entries" which indicates that there is a length restriction for this parameter. Else try to use the avro.schema.url way.