Created on 12-18-2024 03:31 PM - last edited on 12-18-2024 09:47 PM by VidyaSargur
Hi Everyone,
I have the below Input JSON and would like to flatten the data into the below mentioned output JSON format. I am new to the JOLT, I tried to transform (its incomplete) but could not achieve the result.
Kindly help me to get the desired output, thanks in advance.
Input JSON:
{
"data": {
"getItemListing": {
"edges": [
{
"node": {
"id": "2940126",
"identifier": 3119650,
"parentItems": [
{
"id": "364270",
"parentIdentifier": 371153
},
{
"id": "364258",
"parentIdentifier": 371147
}
],
"treatmentClusterIds": [
{
"metadata": [
{
"treatmentClusterIDs": "528"
}
],
"element": {
"name": "GOAT02",
"labelingClusters": [
{
"labelingCluster": "1000"
}
]
}
},
{
"metadata": [
{
"treatmentClusterIDs": "529"
}
],
"element": {
"name": "GOAT03",
"labelingClusters": [
{
"labelingCluster": "4045"
}
]
}
},
{
"metadata": [
{
"treatmentClusterIDs": "5581"
}
],
"element": {
"name": "GOAT04",
"labelingClusters": [
{
"labelingCluster": "2000"
},
{
"labelingCluster": "3000"
},
{
"labelingCluster": "4000"
}
]
}
}
]
}
},
{
"node": {
"id": "2940127",
"identifier": 3119651,
"parentItems": [
{
"id": "364270",
"parentIdentifier": 371154
}
],
"treatmentClusterIds": [
{
"metadata": [
{
"treatmentClusterIDs": "527"
}
],
"element": {
"name": "GOAT01",
"labelingClusters": [
{
"labelingCluster": "1000"
},
{
"labelingCluster": "4045"
},
{
"labelingCluster": "2000"
},
{
"labelingCluster": "3000"
},
{
"labelingCluster": "4000"
}
]
}
}
]
}
}
]
}
}
}
Output JSON:
[ {
"identifier": "3119650",
"parentIdentifier": "371153",
"Element" : "GOAT02",
"TreatmentId" : "528",
"LabelingCluster" : "1000"
}, {
"identifier": "3119650",
"parentIdentifier": "371153",
"Element" : "GOAT03",
"TreatmentId" : "529",
"LabelingCluster" : "4045"
}, {
"identifier": "3119650",
"parentIdentifier": "371153",
"Element" : "GOAT04",
"TreatmentId" : "5581",
"LabelingCluster" : "2000"
}, {
"identifier": "3119650",
"parentIdentifier": "371153",
"Element" : "GOAT04",
"TreatmentId" : "5581",
"LabelingCluster" : "3000"
}, {
"identifier": "3119650",
"parentIdentifier": "371153",
"Element" : "GOAT04",
"TreatmentId" : "5581",
"LabelingCluster" : "4000"
}, {
"identifier": "3119650",
"parentIdentifier": "371147",
"Element" : "GOAT02",
"TreatmentId" : "528",
"LabelingCluster" : "1000"
}, {
"identifier": "3119650",
"parentIdentifier": "371147",
"Element" : "GOAT03",
"TreatmentId" : "529",
"LabelingCluster" : "4045"
}, {
"identifier": "3119650",
"parentIdentifier": "371147",
"Element" : "GOAT04",
"TreatmentId" : "5581",
"LabelingCluster" : "2000"
}, {
"identifier": "3119650",
"parentIdentifier": "371147",
"Element" : "GOAT04",
"TreatmentId" : "5581",
"LabelingCluster" : "3000"
}, {
"identifier": "300119650",
"parentIdentifier": "371147",
"Element" : "GOAT04",
"TreatmentId" : "5581",
"LabelingCluster" : "4000"
}, {
"identifier": "3119651",
"parentIdentifier": "371154",
"Element" : "GOAT01",
"TreatmentId" : "527",
"LabelingCluster" : "1000"
}, {
"identifier": "3119651",
"parentIdentifier": "371154",
"Element" : "GOAT01",
"TreatmentId" : "527",
"LabelingCluster" : "4045"
}, {
"identifier": "3119651",
"parentIdentifier": "371154",
"Element" : "GOAT01",
"TreatmentId" : "527",
"LabelingCluster" : "2000"
}, {
"identifier": "3119651",
"parentIdentifier": "371154",
"Element" : "GOAT01",
"TreatmentId" : "527",
"LabelingCluster" : "3000"
}, {
"identifier": "3119651",
"parentIdentifier": "371154",
"Element" : "GOAT01",
"TreatmentId" : "527",
"LabelingCluster" : "4000"
} ]
JOLT spec I tired (incomplete):
[
{
"operation": "shift",
"spec": {
"data": {
"getItemListing": {
"edges": {
"*": {
"node": {
"treatmentClusterIds": {
"*": "[]"
}
}
}
}
}
}
}
}, {
"operation": "shift",
"spec": {
"*": {
"element": {
"labelingClusters": {
"*": {
"labelingCluster": "[&4].[&1].LabelingCluster",
"@(2,name)": "[&4].[&1].Element",
"@3,metadata": {
"*": {
"treatmentClusterIDs": "[&6].[&2].TreatmentId"
}
}
}
}
}
}
}
},
{
"operation": "shift",
"spec": {
"*": {
"*": "[]"
}
}
}
]
Created 12-19-2024 08:33 PM
Hi @Velankanni ,
I got to say that this is one of the most complex jolt I have ever written. Im starting to question if its even worth doing it because it will make maintaining the the spec very hard. I would urge you to look at this post which has similar request of flattening complex json and there I recommended to use JSTL transformation spec instead as it would simplify things by a lot.
As far as the jolt spec , I was able to do it in 4 transformation:
1- Start form the leaf "labelingCluster" and work your way up to collect common data for each node and assign arrays for the values of metadata & parent identifiers.
2- traverse the metadata array from step 1 to replicate the values from above against each metadata array item.
3- traverse the parent identifier array from step 1 to replicate the values from step2 against each parent identifier array item.
4- Bucket result from step3 into single array
It important to maintain parent\sub array index position during each transformation to help you group fields correctly.
[
{
"operation": "shift",
"spec": {
"data": {
"getItemListing": {
"edges": {
"*": {
"node": {
"treatmentClusterIds": {
"*": {
"element": {
"labelingClusters": {
"*": {
"labelingCluster": {
"@(3,name)": "[&8].treatmentClusterIds[&5].labelingClusters[&2].Element",
"@(6,identifier)": "[&8].treatmentClusterIds[&5].labelingClusters[&2].identifier",
"@": "[&8].treatmentClusterIds[&5].labelingClusters[&2].labelingCluster",
"@(4,metadata)": {
"*": {
"treatmentClusterIDs": "[&10].treatmentClusterIds[&7].labelingClusters[&4].metadata[]"
}
},
"@(6,parentItems)": {
"*": {
"parentIdentifier": "[&10].treatmentClusterIds[&7].labelingClusters[&4].parentId[]"
}
}
}
}
}
}
}
}
}
}
}
}
}
}
},
{
"operation": "shift",
"spec": {
"*": {
"treatmentClusterIds": {
"*": {
"labelingClusters": {
"*": {
"metadata": {
"*": {
"@(2,identifier)": "[&7].treatmentClusterIds[&5].labelingClusters[&3][&1].identifier",
"@": "[&7].treatmentClusterIds[&5].labelingClusters[&3][&1].TreatmentId",
"@(2,Element)": "[&7].treatmentClusterIds[&5].labelingClusters[&3][&1].Element",
"@(2,labelingCluster)": "[&7].treatmentClusterIds[&5].labelingClusters[&3][&1].labelingCluster",
"@(2,parentId)": "[&7].treatmentClusterIds[&5].labelingClusters[&3][&1].parentId"
}
}
}
}
}
}
}
}
}
,
{
"operation": "shift",
"spec": {
"*": {
"treatmentClusterIds": {
"*": {
"labelingClusters": {
"*": {
"*": {
"parentId": {
"*": {
"@(2,identifier)": "[&8].treatmentClusterIds[&6].labelingClusters[&4][&3][&1].identifier",
"@": "[&8].treatmentClusterIds[&6].labelingClusters[&4][&3][&1].parentIdentifier",
"@(2,Element)": "[&8].treatmentClusterIds[&6].labelingClusters[&4][&3][&1].Element",
"@(2,TreatmentId)": "[&8].treatmentClusterIds[&6].labelingClusters[&4][&3][&1].TreatmentId",
"@(2,labelingCluster)": "[&8].treatmentClusterIds[&6].labelingClusters[&4][&3][&1].labelingCluster"
}
}
}
}
}
}
}
}
}
},
{
"operation": "shift",
"spec": {
"*": {
"treatmentClusterIds": {
"*": {
"labelingClusters": {
"*": {
"*": {
"*": "[]"
}
}
}
}
}
}
}
}
]
Again, look into JSTL spec to simplify, or try to break up the json and store into database where you can easily perform sql queries to flatten the data using join
If this helps please accept the solution
Thanks
Created on 12-18-2024 09:03 PM - edited 12-18-2024 09:48 PM
@Velankanni, Welcome to our community! To help you get the best possible answer, I have tagged in our NiFi experts @SAMSAL @MattWho who may be able to assist you further.
Please feel free to provide any additional information or details about your query. We hope that you will find a satisfactory solution to your question.
Regards,
Vidya Sargur,Created 12-19-2024 04:00 AM
Hi @VidyaSargur , Thank you.
Created 12-19-2024 06:53 AM
Hi @Velankanni ,
Can you explain what are you trying to accomplish with the transformation?
I was having a hard time understanding how you came up with output given that you are pulling fields from different arrays with different cardinality.
Created 12-19-2024 05:44 PM
Hi @SAMSAL , Thank you, this JSON is for Bill Of Materials and we have to flatten the entire array at each node and repeat the array values to other columns.
Created on 12-19-2024 09:08 AM - edited 12-19-2024 09:09 AM
@Velankanni
If you are still having the problem you can you try this JOLT remember the spoiler tag distorts the JSON
JOLT Spec
Created 12-19-2024 08:33 PM
Hi @Velankanni ,
I got to say that this is one of the most complex jolt I have ever written. Im starting to question if its even worth doing it because it will make maintaining the the spec very hard. I would urge you to look at this post which has similar request of flattening complex json and there I recommended to use JSTL transformation spec instead as it would simplify things by a lot.
As far as the jolt spec , I was able to do it in 4 transformation:
1- Start form the leaf "labelingCluster" and work your way up to collect common data for each node and assign arrays for the values of metadata & parent identifiers.
2- traverse the metadata array from step 1 to replicate the values from above against each metadata array item.
3- traverse the parent identifier array from step 1 to replicate the values from step2 against each parent identifier array item.
4- Bucket result from step3 into single array
It important to maintain parent\sub array index position during each transformation to help you group fields correctly.
[
{
"operation": "shift",
"spec": {
"data": {
"getItemListing": {
"edges": {
"*": {
"node": {
"treatmentClusterIds": {
"*": {
"element": {
"labelingClusters": {
"*": {
"labelingCluster": {
"@(3,name)": "[&8].treatmentClusterIds[&5].labelingClusters[&2].Element",
"@(6,identifier)": "[&8].treatmentClusterIds[&5].labelingClusters[&2].identifier",
"@": "[&8].treatmentClusterIds[&5].labelingClusters[&2].labelingCluster",
"@(4,metadata)": {
"*": {
"treatmentClusterIDs": "[&10].treatmentClusterIds[&7].labelingClusters[&4].metadata[]"
}
},
"@(6,parentItems)": {
"*": {
"parentIdentifier": "[&10].treatmentClusterIds[&7].labelingClusters[&4].parentId[]"
}
}
}
}
}
}
}
}
}
}
}
}
}
}
},
{
"operation": "shift",
"spec": {
"*": {
"treatmentClusterIds": {
"*": {
"labelingClusters": {
"*": {
"metadata": {
"*": {
"@(2,identifier)": "[&7].treatmentClusterIds[&5].labelingClusters[&3][&1].identifier",
"@": "[&7].treatmentClusterIds[&5].labelingClusters[&3][&1].TreatmentId",
"@(2,Element)": "[&7].treatmentClusterIds[&5].labelingClusters[&3][&1].Element",
"@(2,labelingCluster)": "[&7].treatmentClusterIds[&5].labelingClusters[&3][&1].labelingCluster",
"@(2,parentId)": "[&7].treatmentClusterIds[&5].labelingClusters[&3][&1].parentId"
}
}
}
}
}
}
}
}
}
,
{
"operation": "shift",
"spec": {
"*": {
"treatmentClusterIds": {
"*": {
"labelingClusters": {
"*": {
"*": {
"parentId": {
"*": {
"@(2,identifier)": "[&8].treatmentClusterIds[&6].labelingClusters[&4][&3][&1].identifier",
"@": "[&8].treatmentClusterIds[&6].labelingClusters[&4][&3][&1].parentIdentifier",
"@(2,Element)": "[&8].treatmentClusterIds[&6].labelingClusters[&4][&3][&1].Element",
"@(2,TreatmentId)": "[&8].treatmentClusterIds[&6].labelingClusters[&4][&3][&1].TreatmentId",
"@(2,labelingCluster)": "[&8].treatmentClusterIds[&6].labelingClusters[&4][&3][&1].labelingCluster"
}
}
}
}
}
}
}
}
}
},
{
"operation": "shift",
"spec": {
"*": {
"treatmentClusterIds": {
"*": {
"labelingClusters": {
"*": {
"*": {
"*": "[]"
}
}
}
}
}
}
}
}
]
Again, look into JSTL spec to simplify, or try to break up the json and store into database where you can easily perform sql queries to flatten the data using join
If this helps please accept the solution
Thanks
Created 12-19-2024 09:23 PM
Hi @SAMSAL ,
This works really fine. Thank you so much for your solution.
I got your idea of splitting the JSON and perform the transformation in SQL table. I will work on that.
Thank you again.