Support Questions
Find answers, ask questions, and share your expertise

NIFI JSON to PutParquet Avro Schema Issue

New Contributor

HI All,

 

I am facing NIFI JSON to PutParquet avro schema issue, Please find below for the schema and actual incoming data format.

 

When I use below schema I am getting data but second record fields are getting missed and table looks not good. 

 

And I want to achieve second record fields should come column by column, Can anyone help how to achieve that.

 

 

 

SCHEMA:

 

{
"name": "MyClass",
"type": "record",
"namespace": "com.acme.avro",
"fields": [
{
"name": "preview",
"type": "boolean"
},
{
"name": "offset",
"type": "int"
},
{
"name": "result",
"type": {
"name": "result",
"type": "record",
"fields": [
{
"name": "action",
"type": "string"
},
{
"name": "app",
"type": "string"
},
{
"name": "dest",
"type": "string"
},
{
"name": "dest_bunit",
"type": "string"
},
{
"name": "dest_category",
"type": "string"
},
{
"name": "dest_ip",
"type": "string"
},
{
"name": "dest_port",
"type": "string"
},
{
"name": "dest_priority",
"type": "string"
},
{
"name": "direction",
"type": "string"
},
{
"name": "duration",
"type": "string"
},
{
"name": "dvc",
"type": "string"
},
{
"name": "dvc_ip",
"type": "string"
},
{
"name": "protocol",
"type": "string"
},
{
"name": "response_time",
"type": "string"
},
{
"name": "rule",
"type": "string"
},
{
"name": "session_id",
"type": "string"
},
{
"name": "src",
"type": "string"
},
{
"name": "src_bunit",
"type": "string"
},
{
"name": "src_category",
"type": "string"
},
{
"name": "src_ip",
"type": "string"
},
{
"name": "src_port",
"type": "string"
},
{
"name": "src_priority",
"type": "string"
},
{
"name": "tag",
"type": "string"
},
{
"name": "usr",
"type": "string"
},
{
"name": "user_bunit",
"type": "string"
},
{
"name": "user_category",
"type": "string"
},
{
"name": "user_priority",
"type": "string"
},
{
"name": "vendor_product",
"type": "string"
},
{
"name": "vendor_product_uuid",
"type": "string"
},
{
"name": "ts",
"type": "string"
},
{
"name": "description",
"type": "string"
},
{
"name": "action_reason",
"type": "string"
},
{
"name": "severity",
"type": "string"
},
{
"name": "user_type",
"type": "string"
},
{
"name": "service_type",
"type": "string"
},
{
"name": "dt",
"type": "string"
},
{
"name": "hr",
"type": "string"
}
]
}
},
{
"name": "lastrow",
"type": [
"string",
"null"
]
}
]
}

 

 

DATA:

 

[ {
"preview" : true,
"offset" : 0,
"result" : {
"action" : "allowed",
"app" : "",
"dest" : "xx.xxx.xx.xx",
"dest_bunit" : "",
"dest_category" : "",
"dest_ip" : "xx.xxx.xx.xx",
"dest_port" : "443",
"dest_priority" : "",
"direction" : "N/A",
"duration" : "",
"dvc" : "xx.xxx.xx.xx",

"dvc_ip" : "xx.xxx.xx.xx",
"protocol" : "HTTPS",
"response_time" : "",
"rule" : "/Common/ds_policy_2",
"session_id" : "ad240f0634150d02",
"src" : "xx.xxx.xx.xx",
"src_bunit" : "",
"src_category" : "",
"src_ip" : "xx.xxx.xx.xx",
"src_port" : "62858",
"src_priority" : "",
"tag" : "proxy,web",
"usr" : "N/A",
"user_bunit" : "",
"user_category" : "",
"user_priority" : "",
"vendor_product" : "ASM",
"vendor_product_uuid" : "",
"ts" : "",
"description" : "",
"action_reason" : "",
"severity" : "Informational",
"user_type" : "",
"service_type" : "",
"dt" : "20200331",
"hr" : "15"
},
"lastrow" : null
} ]

 

 

Thanks In Advance.

1 ACCEPTED SOLUTION

Accepted Solutions

New Contributor

I have removed the unwanted fields using the ReplaceText processor and I have achieved what I want. Thanx!

View solution in original post

1 REPLY 1

New Contributor

I have removed the unwanted fields using the ReplaceText processor and I have achieved what I want. Thanx!

View solution in original post