Support Questions

Find answers, ask questions, and share your expertise

Jolt spec to flatten the nested JSON

avatar
Explorer

Hi Everyone,

I have the below Input JSON and would like to flatten the data into the below mentioned output JSON format. I am new to the JOLT, I tried to transform (its incomplete) but could not achieve the result.

Kindly help me to get the desired output, thanks in advance.

Input JSON:

 

 

 

 

 

{
  "data": {
    "getItemListing": {
      "edges": [
        {
          "node": {
            "id": "2940126",
            "identifier": 3119650,
            "parentItems": [
              {
                "id": "364270",
                "parentIdentifier": 371153
              },
              {
                "id": "364258",
                "parentIdentifier": 371147
              }
            ],
            "treatmentClusterIds": [
              {
                "metadata": [
                  {
                    "treatmentClusterIDs": "528"
                  }
                ],
                "element": {
                  "name": "GOAT02",
                  "labelingClusters": [
                    {
                      "labelingCluster": "1000"
                    }
                  ]
                }
              },
              {
                "metadata": [
                  {
                    "treatmentClusterIDs": "529"
                  }
                ],
                "element": {
                  "name": "GOAT03",
                  "labelingClusters": [
                    {
                      "labelingCluster": "4045"
                    }
                  ]
                }
              },
              {
                "metadata": [
                  {
                    "treatmentClusterIDs": "5581"
                  }
                ],
                "element": {
                  "name": "GOAT04",
                  "labelingClusters": [
                    {
                      "labelingCluster": "2000"
                    },
                    {
                      "labelingCluster": "3000"
                    },
                    {
                      "labelingCluster": "4000"
                    }
                  ]
                }
              }
            ]
          }
        },
        {
          "node": {
            "id": "2940127",
            "identifier": 3119651,
            "parentItems": [
              {
                "id": "364270",
                "parentIdentifier": 371154
              }
            ],
            "treatmentClusterIds": [
              {
                "metadata": [
                  {
                    "treatmentClusterIDs": "527"
                  }
                ],
                "element": {
                  "name": "GOAT01",
                  "labelingClusters": [
                    {
                      "labelingCluster": "1000"
                    },
                    {
                      "labelingCluster": "4045"
                    },
                    {
                      "labelingCluster": "2000"
                    },
                    {
                      "labelingCluster": "3000"
                    },
                    {
                      "labelingCluster": "4000"
                    }
                  ]
                }
              }
            ]
          }
        }
      ]
    }
  }
}

 

 

 

 

 

Output JSON:

 

 

 

 

 

[ {
  "identifier": "3119650",
  "parentIdentifier": "371153",
  "Element" : "GOAT02",
  "TreatmentId" : "528",
  "LabelingCluster" : "1000"
}, {
  "identifier": "3119650",
  "parentIdentifier": "371153",
  "Element" : "GOAT03",
  "TreatmentId" : "529",
  "LabelingCluster" : "4045"
}, {
  "identifier": "3119650",
  "parentIdentifier": "371153",
  "Element" : "GOAT04",
  "TreatmentId" : "5581",
  "LabelingCluster" : "2000"
}, {
  "identifier": "3119650",
  "parentIdentifier": "371153",
  "Element" : "GOAT04",
  "TreatmentId" : "5581",
  "LabelingCluster" : "3000"
}, {
  "identifier": "3119650",
  "parentIdentifier": "371153",
  "Element" : "GOAT04",
  "TreatmentId" : "5581",
  "LabelingCluster" : "4000"
}, {
  "identifier": "3119650",
  "parentIdentifier": "371147",
  "Element" : "GOAT02",
  "TreatmentId" : "528",
  "LabelingCluster" : "1000"
}, {
  "identifier": "3119650",
  "parentIdentifier": "371147",
  "Element" : "GOAT03",
  "TreatmentId" : "529",
  "LabelingCluster" : "4045"
}, {
  "identifier": "3119650",
  "parentIdentifier": "371147",
  "Element" : "GOAT04",
  "TreatmentId" : "5581",
  "LabelingCluster" : "2000"
}, {
  "identifier": "3119650",
  "parentIdentifier": "371147",
  "Element" : "GOAT04",
  "TreatmentId" : "5581",
  "LabelingCluster" : "3000"
}, {
  "identifier": "300119650",
  "parentIdentifier": "371147",
  "Element" : "GOAT04",
  "TreatmentId" : "5581",
  "LabelingCluster" : "4000"
}, {
  "identifier": "3119651",
  "parentIdentifier": "371154",
  "Element" : "GOAT01",
  "TreatmentId" : "527",
  "LabelingCluster" : "1000"
}, {
  "identifier": "3119651",
  "parentIdentifier": "371154",
  "Element" : "GOAT01",
  "TreatmentId" : "527",
  "LabelingCluster" : "4045"
}, {
  "identifier": "3119651",
  "parentIdentifier": "371154",
  "Element" : "GOAT01",
  "TreatmentId" : "527",
  "LabelingCluster" : "2000"
}, {
  "identifier": "3119651",
  "parentIdentifier": "371154",
  "Element" : "GOAT01",
  "TreatmentId" : "527",
  "LabelingCluster" : "3000"
}, {
  "identifier": "3119651",
  "parentIdentifier": "371154",
  "Element" : "GOAT01",
  "TreatmentId" : "527",
  "LabelingCluster" : "4000"
} ]

 

 

 

 

 

JOLT spec I tired (incomplete):

 

 

 

 

 

[
  {
    "operation": "shift",
    "spec": {
      "data": {
        "getItemListing": {
          "edges": {
            "*": {
              "node": {
                "treatmentClusterIds": {
                  "*": "[]"
                }
              }
            }
          }
        }
      }
    }
 }, {
    "operation": "shift",
    "spec": {
      "*": {
        "element": {
          "labelingClusters": {
            "*": {
              "labelingCluster": "[&4].[&1].LabelingCluster",
              "@(2,name)": "[&4].[&1].Element",
              "@3,metadata": {
                "*": {
                  "treatmentClusterIDs": "[&6].[&2].TreatmentId"
                }
              }
            }
          }
        }
      }
    }
 },
  {
    "operation": "shift",
    "spec": {
      "*": {
        "*": "[]"
      }
    }
 }
]

 

 

 

 

 

 

1 ACCEPTED SOLUTION

avatar
Super Guru

Hi @Velankanni ,

I got to say that this is one of the most complex jolt I have ever written. Im starting to question if its even worth doing it because it will make maintaining the the spec very hard. I would urge you to look at this post which has similar request of flattening complex json and there I recommended to use JSTL transformation spec instead as it would simplify things by a lot.

As far as the jolt spec , I was able to do it in 4 transformation:

1- Start form the leaf "labelingCluster" and work your way up to collect common data for each node and assign arrays for the values of metadata & parent identifiers.

2- traverse the metadata array from step 1 to replicate the values from above against each metadata array item.

3- traverse the parent identifier array from step 1 to replicate the values from step2 against each parent identifier array item.

4- Bucket result from step3 into single array

 

It important to maintain parent\sub array  index position during each transformation to help you group fields correctly.

 

 

 

[
  {
    "operation": "shift",
    "spec": {
      "data": {
        "getItemListing": {
          "edges": {
            "*": {
              "node": {
                "treatmentClusterIds": {
                  "*": {
                    "element": {
                      "labelingClusters": {
                        "*": {
                          "labelingCluster": {
                            "@(3,name)": "[&8].treatmentClusterIds[&5].labelingClusters[&2].Element",
                            "@(6,identifier)": "[&8].treatmentClusterIds[&5].labelingClusters[&2].identifier",
                            "@": "[&8].treatmentClusterIds[&5].labelingClusters[&2].labelingCluster",
                            "@(4,metadata)": {
                              "*": {
                                "treatmentClusterIDs": "[&10].treatmentClusterIds[&7].labelingClusters[&4].metadata[]"
                              }
                            },
                            "@(6,parentItems)": {
                              "*": {
                                "parentIdentifier": "[&10].treatmentClusterIds[&7].labelingClusters[&4].parentId[]"
                              }
                            }
                          }
                        }
                      }
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
  },
  {
    "operation": "shift",
    "spec": {
      "*": {
        "treatmentClusterIds": {
          "*": {
            "labelingClusters": {
              "*": {
                "metadata": {
                  "*": {
                    "@(2,identifier)": "[&7].treatmentClusterIds[&5].labelingClusters[&3][&1].identifier",
                    "@": "[&7].treatmentClusterIds[&5].labelingClusters[&3][&1].TreatmentId",
                    "@(2,Element)": "[&7].treatmentClusterIds[&5].labelingClusters[&3][&1].Element",
                    "@(2,labelingCluster)": "[&7].treatmentClusterIds[&5].labelingClusters[&3][&1].labelingCluster",
                    "@(2,parentId)": "[&7].treatmentClusterIds[&5].labelingClusters[&3][&1].parentId"
                  }
                }
              }
            }
          }
        }
      }
    }
  }
  ,
  {
    "operation": "shift",
    "spec": {
      "*": {
        "treatmentClusterIds": {
          "*": {
            "labelingClusters": {
              "*": {
                "*": {
                  "parentId": {
                    "*": {
                      "@(2,identifier)": "[&8].treatmentClusterIds[&6].labelingClusters[&4][&3][&1].identifier",
                      "@": "[&8].treatmentClusterIds[&6].labelingClusters[&4][&3][&1].parentIdentifier",
                      "@(2,Element)": "[&8].treatmentClusterIds[&6].labelingClusters[&4][&3][&1].Element",
                      "@(2,TreatmentId)": "[&8].treatmentClusterIds[&6].labelingClusters[&4][&3][&1].TreatmentId",
                      "@(2,labelingCluster)": "[&8].treatmentClusterIds[&6].labelingClusters[&4][&3][&1].labelingCluster"
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
  },
  {
    "operation": "shift",
    "spec": {
      "*": {
        "treatmentClusterIds": {
          "*": {
            "labelingClusters": {
              "*": {
                "*": {
                  "*": "[]"
                }
              }
            }
          }
        }
      }
    }
  }

]

 

Again, look into JSTL spec to simplify, or try to break up the json and store into database where you can easily perform sql queries to flatten the data using join

 

If this helps please accept the solution

Thanks

 

 

 

View solution in original post

7 REPLIES 7

avatar
Community Manager

@Velankanni, Welcome to our community! To help you get the best possible answer, I have tagged in our NiFi experts @SAMSAL @MattWho   who may be able to assist you further.

Please feel free to provide any additional information or details about your query. We hope that you will find a satisfactory solution to your question.



Regards,

Vidya Sargur,
Community Manager


Was your question answered? Make sure to mark the answer as the accepted solution.
If you find a reply useful, say thanks by clicking on the thumbs up button.
Learn more about the Cloudera Community:

avatar
Explorer

 Hi @VidyaSargur , Thank you.

avatar
Super Guru

Hi @Velankanni ,

Can you explain what are you trying to accomplish with the transformation? 

I was having a hard time understanding how you came up with output given that you are pulling fields from different arrays with different cardinality.

 

 

avatar
Explorer

Hi @SAMSAL , Thank you, this JSON is for Bill Of Materials and we have to flatten the entire array at each node and repeat the array values to other columns.


avatar
Master Mentor

@Velankanni 
If you are still having  the problem you can you try this JOLT remember the spoiler tag distorts the JSON 

JOLT Spec

Spoiler
[
{
"operation": "shift",
"spec": {
"data": {
"getItemListing": {
"edges": {
"*": {
"node": {
"identifier": "[&1].identifier",
"parentItems": {
"*": {
"parentIdentifier": "[&3].[&1].parentIdentifier"
}
},
"treatmentClusterIds": {
"*": {
"metadata": {
"*": {
"treatmentClusterIDs": "[&4].[&2].[&1].TreatmentId"
}
},
"element": {
"name": "[&4].[&2].[&1].Element",
"labelingClusters": {
"*": {
"labelingCluster": "[&5].[&3].[&2].LabelingCluster"
}
}
}
}
}
}
}
}
}
}
}
},
{
"operation": "shift",
"spec": {
"*": {
"*": {
"*": {
"*": {
"*": {
"*": {
"$": "[#6].identifier",
"parentIdentifier": "[#6].parentIdentifier",
"Element": "[#6].Element",
"TreatmentId": "[#6].TreatmentId",
"LabelingCluster": "[#6].LabelingCluster"
}
}
}
}
}
}
}
}
]
  • Place the Input JSON in a file  input.json.
  • Use a JOLT processor
  • Apply the spec above and verify the output matches the Output JSON format.

    Happy hadooping

 

avatar
Super Guru

Hi @Velankanni ,

I got to say that this is one of the most complex jolt I have ever written. Im starting to question if its even worth doing it because it will make maintaining the the spec very hard. I would urge you to look at this post which has similar request of flattening complex json and there I recommended to use JSTL transformation spec instead as it would simplify things by a lot.

As far as the jolt spec , I was able to do it in 4 transformation:

1- Start form the leaf "labelingCluster" and work your way up to collect common data for each node and assign arrays for the values of metadata & parent identifiers.

2- traverse the metadata array from step 1 to replicate the values from above against each metadata array item.

3- traverse the parent identifier array from step 1 to replicate the values from step2 against each parent identifier array item.

4- Bucket result from step3 into single array

 

It important to maintain parent\sub array  index position during each transformation to help you group fields correctly.

 

 

 

[
  {
    "operation": "shift",
    "spec": {
      "data": {
        "getItemListing": {
          "edges": {
            "*": {
              "node": {
                "treatmentClusterIds": {
                  "*": {
                    "element": {
                      "labelingClusters": {
                        "*": {
                          "labelingCluster": {
                            "@(3,name)": "[&8].treatmentClusterIds[&5].labelingClusters[&2].Element",
                            "@(6,identifier)": "[&8].treatmentClusterIds[&5].labelingClusters[&2].identifier",
                            "@": "[&8].treatmentClusterIds[&5].labelingClusters[&2].labelingCluster",
                            "@(4,metadata)": {
                              "*": {
                                "treatmentClusterIDs": "[&10].treatmentClusterIds[&7].labelingClusters[&4].metadata[]"
                              }
                            },
                            "@(6,parentItems)": {
                              "*": {
                                "parentIdentifier": "[&10].treatmentClusterIds[&7].labelingClusters[&4].parentId[]"
                              }
                            }
                          }
                        }
                      }
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
  },
  {
    "operation": "shift",
    "spec": {
      "*": {
        "treatmentClusterIds": {
          "*": {
            "labelingClusters": {
              "*": {
                "metadata": {
                  "*": {
                    "@(2,identifier)": "[&7].treatmentClusterIds[&5].labelingClusters[&3][&1].identifier",
                    "@": "[&7].treatmentClusterIds[&5].labelingClusters[&3][&1].TreatmentId",
                    "@(2,Element)": "[&7].treatmentClusterIds[&5].labelingClusters[&3][&1].Element",
                    "@(2,labelingCluster)": "[&7].treatmentClusterIds[&5].labelingClusters[&3][&1].labelingCluster",
                    "@(2,parentId)": "[&7].treatmentClusterIds[&5].labelingClusters[&3][&1].parentId"
                  }
                }
              }
            }
          }
        }
      }
    }
  }
  ,
  {
    "operation": "shift",
    "spec": {
      "*": {
        "treatmentClusterIds": {
          "*": {
            "labelingClusters": {
              "*": {
                "*": {
                  "parentId": {
                    "*": {
                      "@(2,identifier)": "[&8].treatmentClusterIds[&6].labelingClusters[&4][&3][&1].identifier",
                      "@": "[&8].treatmentClusterIds[&6].labelingClusters[&4][&3][&1].parentIdentifier",
                      "@(2,Element)": "[&8].treatmentClusterIds[&6].labelingClusters[&4][&3][&1].Element",
                      "@(2,TreatmentId)": "[&8].treatmentClusterIds[&6].labelingClusters[&4][&3][&1].TreatmentId",
                      "@(2,labelingCluster)": "[&8].treatmentClusterIds[&6].labelingClusters[&4][&3][&1].labelingCluster"
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
  },
  {
    "operation": "shift",
    "spec": {
      "*": {
        "treatmentClusterIds": {
          "*": {
            "labelingClusters": {
              "*": {
                "*": {
                  "*": "[]"
                }
              }
            }
          }
        }
      }
    }
  }

]

 

Again, look into JSTL spec to simplify, or try to break up the json and store into database where you can easily perform sql queries to flatten the data using join

 

If this helps please accept the solution

Thanks

 

 

 

avatar
Explorer

Hi @SAMSAL ,

This works really fine. Thank you so much for your solution.

I got your idea of splitting the JSON and perform the transformation in SQL table. I will work on that.

Thank you again.