Support Questions

Find answers, ask questions, and share your expertise

How to parse Yarn Metrics via REST ( api v49 )json output to Hive JsonSerde Table

New Contributor

Trying to load and parse YARN application metrics and store it for performance analysis by using REST api call. Data returned is a single json entry with multiple application entries nested in a complex format. 

Can someone suggest or provide correct DDL how to map all data points to a hive external table since JsonSerde can parse and tabulate it?

Sample json data format: 

{"apps":{"app":[{"id":"application_282828282828_12717","user":"xyz","name":"xyz-4b6bdae2-1a0c-4772-bd8e-0d7454268b82","queue":"root.users.dummy","state":"finished","finalstatus":"succeeded","progress":100.0,"trackingui":"history","trackingurl":"http://dang:8088/proxy/application_282828282828_12717/","diagnostics":"session stats:submitteddags=1, successfuldags=1, faileddags=0, killeddags=0\n","clusterid":282828282828,"applicationtype":"aquaman","applicationtags":"ABC,xyz_20221107070124_2beb5d90-24c7-4b1b-b977-3c9af1397195,userid=dummy","priority":0,"startedtime":1667822485626,"launchtime":1667822485767,"finishedtime":1667822553365,"elapsedtime":67739,"amcontainerlogs":"http://dingdong:8042/node/containerlogs/container_e65_282828282828_12717_01_000001/xyz","amhosthttpaddress":"dingdong:8042","amrpcaddress":"dingdong:46457","masternodeid":"dingdong:8041","allocatedmb":-1,"allocatedvcores":-1,"reservedmb":-1,"reservedvcores":-1,"runningcontainers":-1,"memoryseconds":1264304,"vcoreseconds":79,"queueusagepercentage":0.0,"clusterusagepercentage":0.0,"resourcesecondsmap":{"entry":{"key":"memory-mb","value":"1264304"},"entry":{"key":"vcores","value":"79"}},"preemptedresourcemb":0,"preemptedresourcevcores":0,"numnonamcontainerpreempted":0,"numamcontainerpreempted":0,"preemptedmemoryseconds":0,"preemptedvcoreseconds":0,"preemptedresourcesecondsmap":{},"logaggregationstatus":"succeeded","unmanagedapplication":false,"amnodelabelexpression":"","timeouts":{"timeout":[{"type":"lifetime","expirytime":"unlimited","remainingtimeinseconds":-1}]}},{"id":"application_282828282828_12724","user":"xyz","name":"xyz-94962a3e-d230-4fd0-b68b-01b59dd3299d","queue":"root.users.dummy","state":"finished","finalstatus":"succeeded","progress":100.0,"trackingui":"history","trackingurl":"http://dang:8088/proxy/application_282828282828_12724/","diagnostics":"session stats:submitteddags=1, successfuldags=1, faileddags=0, killeddags=0\n","clusterid":282828282828,"applicationtype":"aquaman","applicationtags":"ZZZ_,xyz_20221107070301_e6f788db-e39c-49b6-97d5-6a02ff994c00,userid=dummy","priority":0,"startedtime":1667822585231,"launchtime":1667822585437,"finishedtime":1667822631435,"elapsedtime":46204,"amcontainerlogs":"http://ding:8042/node/containerlogs/container_e65_282828282828_12724_01_000002/xyz","amhosthttpaddress":"ding:8042","amrpcaddress":"ding:46648","masternodeid":"ding:8041","allocatedmb":-1,"allocatedvcores":-1,"reservedmb":-1,"reservedvcores":-1,"runningcontainers":-1,"memoryseconds":5603339,"vcoreseconds":430,"queueusagepercentage":0.0,"clusterusagepercentage":0.0,"resourcesecondsmap":{"entry":{"key":"memory-mb","value":"5603339"},"entry":{"key":"vcores","value":"430"}},"preemptedresourcemb":0,"preemptedresourcevcores":0,"numnonamcontainerpreempted":0,"numamcontainerpreempted":0,"preemptedmemoryseconds":0,"preemptedvcoreseconds":0,"preemptedresourcesecondsmap":{},"logaggregationstatus":"time_out","unmanagedapplication":false,"amnodelabelexpression":"","timeouts":{"timeout":[{"type":"lifetime","expirytime":"unlimited","remainingtimeinseconds":-1}]}},{"id":"application_282828282828_12736","user":"xyz","name":"xyz-1a9c73ef-2992-40a5-aaad-9f0688bb04f4","queue":"root.users.dummy","state":"finished","finalstatus":"succeeded","progress":100.0,"trackingui":"history","trackingurl":"http://dang:8088/proxy/application_282828282828_12736/","diagnostics":"session stats:submitteddags=1, successfuldags=1, faileddags=0, killeddags=0\n","clusterid":282828282828,"applicationtype":"aquaman","applicationtags":"BLAHBLAH,xyz_20221107070609_8d261352-3efa-46c5-a5a0-8a3cd745d180,userid=dummy","priority":0,"startedtime":1667822771170,"launchtime":1667822773663,"finishedtime":1667822820351,"elapsedtime":49181,"amcontainerlogs":"http://dong:8042/node/containerlogs/container_e65_282828282828_12736_01_000001/xyz","amhosthttpaddress":"dong:8042","amrpcaddress":"dong:34266","masternodeid":"dong:8041","allocatedmb":-1,"allocatedvcores":-1,"reservedmb":-1,"reservedvcores":-1,"runningcontainers":-1,"memoryseconds":1300011,"vcoreseconds":89,"queueusagepercentage":0.0,"clusterusagepercentage":0.0,"resourcesecondsmap":{"entry":{"key":"memory-mb","value":"1300011"},"entry":{"key":"vcores","value":"89"}},"preemptedresourcemb":0,"preemptedresourcevcores":0,"numnonamcontainerpreempted":0,"numamcontainerpreempted":0,"preemptedmemoryseconds":0,"preemptedvcoreseconds":0,"preemptedresourcesecondsmap":{},"logaggregationstatus":"succeeded","unmanagedapplication":false,"amnodelabelexpression":"","timeouts":{"timeout":[{"type":"lifetime","expirytime":"unlimited","remainingtimeinseconds":-1}]}},{"id":"application_282828282828_12735","user":"xyz","name":"xyz-d5f56a0a-9c6b-4651-8f88-6eaff5953777","queue":"root.users.dummy","state":"finished","finalstatus":"succeeded","progress":100.0,"trackingui":"history","trackingurl":"http://dang:8088/proxy/application_282828282828_12735/","diagnostics":"session stats:submitteddags=1, successfuldags=1, faileddags=0, killeddags=0\n","clusterid":282828282828,"applicationtype":"aquaman","applicationtags":"HAHAHA_,xyz_20221107070605_a082d9d8-912f-4278-a2ef-5dfe66089fd7,userid=dummy","priority":0,"startedtime":1667822766897,"launchtime":1667822766999,"finishedtime":1667822796759,"elapsedtime":29862,"amcontainerlogs":"http://dung:8042/node/containerlogs/container_e65_282828282828_12735_01_000001/xyz","amhosthttpaddress":"dung:8042","amrpcaddress":"dung:42765","masternodeid":"dung:8041","allocatedmb":-1,"allocatedvcores":-1,"reservedmb":-1,"reservedvcores":-1,"runningcontainers":-1,"memoryseconds":669695,"vcoreseconds":44,"queueusagepercentage":0.0,"clusterusagepercentage":0.0,"resourcesecondsmap":{"entry":{"key":"memory-mb","value":"669695"},"entry":{"key":"vcores","value":"44"}},"preemptedresourcemb":0,"preemptedresourcevcores":0,"numnonamcontainerpreempted":0,"numamcontainerpreempted":0,"preemptedmemoryseconds":0,"preemptedvcoreseconds":0,"preemptedresourcesecondsmap":{},"logaggregationstatus":"succeeded","unmanagedapplication":false,"amnodelabelexpression":"","timeouts":{"timeout":[{"type":"lifetime","expirytime":"unlimited","remainingtimeinseconds":-1}]}}]}}  

So far, using Json2Hive (nifty tool which generates json serde DDL from data) below DDL was created. Can someone confirm if this structure is correct or if there is a better way to create the table? Once created , how to explode the data entries so that each application will have its own row in the final table. 


create external table user_tables.sample_json_table (
  `apps` struct<
    `app`: array<struct<
        `id`: string,
        `queue`: string,
        `finalstatus`: string,
        `trackingurl`: string,
        `applicationtype`: string,
        `applicationtags`: string,
        `startedtime`: string,
        `launchtime`: string,
        `finishedtime`: string,
        `memoryseconds`: string,
        `vcoreseconds`: string,
        `resourcesecondsmap`: struct<
          `entry`: struct<
            `key`: string,
            `value`: string
          >
        >
      >
    >
  >
)
row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe' 
location '/xyz/location/;

 

0 REPLIES 0