<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question How to split particular data within json using splitjson processor in Apache Nifi? in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/How-to-split-particular-data-within-json-using-splitjson/m-p/372046#M241144</link>
    <description>&lt;P&gt;I have this data and I need to split&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;VisitList's&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;content into different flowfile one by one.&lt;/P&gt;&lt;P&gt;What should I write in&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;JsonPath Expression&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;so that data got split, I tried&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;$.*&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;but then it is considering this given data into one file only while I want to split on the basis of VisitList.&lt;/P&gt;&lt;P&gt;Data&lt;/P&gt;&lt;LI-CODE lang="java"&gt;[
  {
    "employer": "98765",
    "loc_id": "312",
    "topId": "Management",
    "VisitList": [
      {
        "S1": "HR",
        "S2": "Accountant"
      },
      {
        "S1": "Manager",
        "S2": "Sr. Manager"
      }
    ]
  }
]&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;SPAN&gt;I want to split Data into files like this 1st flowfile&lt;/SPAN&gt;&lt;/P&gt;&lt;LI-CODE lang="java"&gt;[
  {
    "employer": "98765",
    "loc_id": "312",
    "topId": "Management",
    "VisitList": [
      {
        "S1": "HR",
        "S2": "Accountant"
      }
    ]
  }
]&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;2nd flowfile&lt;/P&gt;&lt;LI-CODE lang="java"&gt;[
  {
    "employer": "98765",
    "loc_id": "312",
    "topId": "Management",
    "VisitList": [
      {
        "S1": "Manager",
        "S2": "Sr. Manager"
      }
    ]
  }
]&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;I&amp;nbsp;&lt;SPAN&gt;&amp;nbsp;tried putting $.* in JsonPath Expression but it didnt work the way I want.&lt;/SPAN&gt;&lt;/P&gt;</description>
    <pubDate>Fri, 02 Jun 2023 18:57:40 GMT</pubDate>
    <dc:creator>Dracile</dc:creator>
    <dc:date>2023-06-02T18:57:40Z</dc:date>
    <item>
      <title>How to split particular data within json using splitjson processor in Apache Nifi?</title>
      <link>https://community.cloudera.com/t5/Support-Questions/How-to-split-particular-data-within-json-using-splitjson/m-p/372046#M241144</link>
      <description>&lt;P&gt;I have this data and I need to split&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;VisitList's&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;content into different flowfile one by one.&lt;/P&gt;&lt;P&gt;What should I write in&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;JsonPath Expression&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;so that data got split, I tried&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;$.*&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;but then it is considering this given data into one file only while I want to split on the basis of VisitList.&lt;/P&gt;&lt;P&gt;Data&lt;/P&gt;&lt;LI-CODE lang="java"&gt;[
  {
    "employer": "98765",
    "loc_id": "312",
    "topId": "Management",
    "VisitList": [
      {
        "S1": "HR",
        "S2": "Accountant"
      },
      {
        "S1": "Manager",
        "S2": "Sr. Manager"
      }
    ]
  }
]&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;SPAN&gt;I want to split Data into files like this 1st flowfile&lt;/SPAN&gt;&lt;/P&gt;&lt;LI-CODE lang="java"&gt;[
  {
    "employer": "98765",
    "loc_id": "312",
    "topId": "Management",
    "VisitList": [
      {
        "S1": "HR",
        "S2": "Accountant"
      }
    ]
  }
]&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;2nd flowfile&lt;/P&gt;&lt;LI-CODE lang="java"&gt;[
  {
    "employer": "98765",
    "loc_id": "312",
    "topId": "Management",
    "VisitList": [
      {
        "S1": "Manager",
        "S2": "Sr. Manager"
      }
    ]
  }
]&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;I&amp;nbsp;&lt;SPAN&gt;&amp;nbsp;tried putting $.* in JsonPath Expression but it didnt work the way I want.&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 02 Jun 2023 18:57:40 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/How-to-split-particular-data-within-json-using-splitjson/m-p/372046#M241144</guid>
      <dc:creator>Dracile</dc:creator>
      <dc:date>2023-06-02T18:57:40Z</dc:date>
    </item>
    <item>
      <title>Re: How to split particular data within json using splitjson processor in Apache Nifi?</title>
      <link>https://community.cloudera.com/t5/Support-Questions/How-to-split-particular-data-within-json-using-splitjson/m-p/372068#M241146</link>
      <description>&lt;P&gt;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/105375"&gt;@Dracile&lt;/a&gt;&amp;nbsp;Welcome to the Cloudera Community!&lt;BR /&gt;&lt;BR /&gt;To help you get the best possible solution, I have tagged our NiFi experts&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/95503"&gt;@steven-matison&lt;/a&gt;&amp;nbsp;ans&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/103151"&gt;@cotopaul&lt;/a&gt;&amp;nbsp; who may be able to assist you further.&lt;BR /&gt;&lt;BR /&gt;Please keep us updated on your post, and we hope you find a satisfactory solution to your query.&lt;/P&gt;</description>
      <pubDate>Fri, 02 Jun 2023 22:37:48 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/How-to-split-particular-data-within-json-using-splitjson/m-p/372068#M241146</guid>
      <dc:creator>DianaTorres</dc:creator>
      <dc:date>2023-06-02T22:37:48Z</dc:date>
    </item>
    <item>
      <title>Re: How to split particular data within json using splitjson processor in Apache Nifi?</title>
      <link>https://community.cloudera.com/t5/Support-Questions/How-to-split-particular-data-within-json-using-splitjson/m-p/372070#M241148</link>
      <description>&lt;P&gt;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/105375"&gt;@Dracile&lt;/a&gt;,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I do not think that SplitJson is the correct processor for you. What you are trying to achieve might be possible using some JOLT transformations. Unfortunately, I am not near a computer to test a correct transformation but I know that&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/80381"&gt;@SAMSAL&lt;/a&gt;&amp;nbsp;has plenty of experience in using jolts and he might be able to further assist you.&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 02 Jun 2023 22:45:28 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/How-to-split-particular-data-within-json-using-splitjson/m-p/372070#M241148</guid>
      <dc:creator>cotopaul</dc:creator>
      <dc:date>2023-06-02T22:45:28Z</dc:date>
    </item>
    <item>
      <title>Re: How to split particular data within json using splitjson processor in Apache Nifi?</title>
      <link>https://community.cloudera.com/t5/Support-Questions/How-to-split-particular-data-within-json-using-splitjson/m-p/372079#M241152</link>
      <description>&lt;P&gt;But I need to send file one by one and I have split the Content of VisitList only (only two rows S1 and S2), which has further splitted into 2 files using SplitJson but how can I process those two files one by one?&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sat, 03 Jun 2023 16:50:24 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/How-to-split-particular-data-within-json-using-splitjson/m-p/372079#M241152</guid>
      <dc:creator>Dracile</dc:creator>
      <dc:date>2023-06-03T16:50:24Z</dc:date>
    </item>
    <item>
      <title>Re: How to split particular data within json using splitjson processor in Apache Nifi?</title>
      <link>https://community.cloudera.com/t5/Support-Questions/How-to-split-particular-data-within-json-using-splitjson/m-p/372127#M241159</link>
      <description>&lt;P&gt;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/105375"&gt;@Dracile&lt;/a&gt;&amp;nbsp;If you are looking to iterate through the results inside of your upstream json object,&amp;nbsp; you need QueryRecord with a Json Reader and Writer.&amp;nbsp; This allows you to provide the upstream schema (reader), downstream schema (writer) and a query against the flowfile.&amp;nbsp; &amp;nbsp; &amp;nbsp;This will unfortunately lose the original object values&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;You can find an example here:&lt;/P&gt;&lt;P&gt;&lt;BR /&gt;&lt;A href="https://github.com/cldr-steven-matison/NiFi-Templates/blob/main/QueryRecord_Sample.json" target="_blank"&gt;https://github.com/cldr-steven-matison/NiFi-Templates/blob/main/QueryRecord_Sample.json&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;You will need to modify the json object in GenerateFlowFile, then adjust the Reader/Writer, and the results query into $.VisitList[] array.&amp;nbsp; &amp;nbsp;Once you have this lil mini test working, take the logic to your final flow.&amp;nbsp;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 05 Jun 2023 13:14:15 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/How-to-split-particular-data-within-json-using-splitjson/m-p/372127#M241159</guid>
      <dc:creator>steven-matison</dc:creator>
      <dc:date>2023-06-05T13:14:15Z</dc:date>
    </item>
    <item>
      <title>Re: How to split particular data within json using splitjson processor in Apache Nifi?</title>
      <link>https://community.cloudera.com/t5/Support-Questions/How-to-split-particular-data-within-json-using-splitjson/m-p/372327#M241221</link>
      <description>&lt;P&gt;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/105375"&gt;@Dracile&lt;/a&gt;&amp;nbsp;Has the reply helped resolve your issue? If so, please mark the appropriate reply as the solution, as it will make it easier for others to find the answer in the future.&amp;nbsp; Thanks.&lt;/P&gt;</description>
      <pubDate>Thu, 08 Jun 2023 15:09:35 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/How-to-split-particular-data-within-json-using-splitjson/m-p/372327#M241221</guid>
      <dc:creator>DianaTorres</dc:creator>
      <dc:date>2023-06-08T15:09:35Z</dc:date>
    </item>
    <item>
      <title>Re: How to split particular data within json using splitjson processor in Apache Nifi?</title>
      <link>https://community.cloudera.com/t5/Support-Questions/How-to-split-particular-data-within-json-using-splitjson/m-p/372603#M241304</link>
      <description>&lt;P&gt;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/105375"&gt;@Dracile&lt;/a&gt;&amp;nbsp;- Instead I recommend using ForkRecord Processor.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Like&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/95503"&gt;@steven-matison&lt;/a&gt;&amp;nbsp;mentioned, create a Record Reader and Record Writer and add another property with the record path /VisitList, Mode - Split, Include Parent Fields to True.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="drewski7_1-1686750876805.png" style="width: 400px;"&gt;&lt;img src="https://community.cloudera.com/t5/image/serverpage/image-id/37818iA9427C6C156A2BBC/image-size/medium?v=v2&amp;amp;px=400" role="button" title="drewski7_1-1686750876805.png" alt="drewski7_1-1686750876805.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;This will result in the next flowfile looking like this -&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="javascript"&gt;[ {
  "employer" : "98765",
  "loc_id" : "312",
  "topId" : "Management",
  "VisitList" : [ {
    "S1" : "HR",
    "S2" : "Accountant"
  } ]
}, {
  "employer" : "98765",
  "loc_id" : "312",
  "topId" : "Management",
  "VisitList" : [ {
    "S1" : "Manager",
    "S2" : "Sr. Manager"
  } ]
} ]&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Then you could split on $ using a SplitJson processor or even better continue using more record-oriented processors for better&amp;nbsp; performance &lt;span class="lia-unicode-emoji" title=":slightly_smiling_face:"&gt;🙂&lt;/span&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 14 Jun 2023 13:57:24 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/How-to-split-particular-data-within-json-using-splitjson/m-p/372603#M241304</guid>
      <dc:creator>drewski7</dc:creator>
      <dc:date>2023-06-14T13:57:24Z</dc:date>
    </item>
    <item>
      <title>Re: How to split particular data within json using splitjson processor in Apache Nifi?</title>
      <link>https://community.cloudera.com/t5/Support-Questions/How-to-split-particular-data-within-json-using-splitjson/m-p/372618#M241312</link>
      <description>&lt;P&gt;I would do this in a single step with a &lt;SPAN&gt;InvokeScriptedProcessor and the following Groovy code&lt;/SPAN&gt;&lt;/P&gt;&lt;LI-CODE lang="java"&gt;import groovy.json.JsonOutput
import groovy.json.JsonSlurper
import java.nio.charset.StandardCharsets
import org.apache.commons.io.IOUtils

class GroovyProcessor implements Processor {
    PropertyDescriptor BATCH_SIZE = new PropertyDescriptor.Builder()
        .name("BATCH_SIZE")
        .displayName("Batch Size")
        .description("The number of incoming FlowFiles to process in a single execution of this processor.")
        .required(true)
        .defaultValue("1000")
        .addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR)
        .build()

    Relationship REL_SUCCESS = new Relationship.Builder()
        .name("success")
        .description('FlowFiles that were successfully processed are routed here')
        .build()

    Relationship REL_FAILURE = new Relationship.Builder()
        .name("failure")
        .description('FlowFiles that were not successfully processed are routed here')
        .build()
    
    ComponentLog log 
    
    void initialize(ProcessorInitializationContext context) { log = context.logger }
    Set&amp;lt;Relationship&amp;gt; getRelationships() { return [REL_FAILURE, REL_SUCCESS] as Set }
    Collection&amp;lt;ValidationResult&amp;gt; validate(ValidationContext context) { null }
    PropertyDescriptor getPropertyDescriptor(String name) { null }
    void onPropertyModified(PropertyDescriptor descriptor, String oldValue, String newValue) { }
    List&amp;lt;PropertyDescriptor&amp;gt; getPropertyDescriptors() { Collections.unmodifiableList([BATCH_SIZE]) as List&amp;lt;PropertyDescriptor&amp;gt; }
    String getIdentifier() { null }

    JsonSlurper jsonSlurper = new JsonSlurper()
    JsonOutput jsonOutput = new JsonOutput()
    
    void onTrigger(ProcessContext context, ProcessSessionFactory sessionFactory) throws ProcessException {
        ProcessSession session = sessionFactory.createSession()
        try {
            List&amp;lt;FlowFile&amp;gt; flowFiles = session.get(context.getProperty(BATCH_SIZE).asInteger())
            if (!flowFiles) return

            flowFiles.each { flowFile -&amp;gt;
                Map customAttributes = [ "mime.type": "application/json" ]
                List data = null
                session.read(flowFile, { 
                    inputStream -&amp;gt; data = jsonSlurper.parseText(IOUtils.toString(inputStream, StandardCharsets.UTF_8)) 
                } as InputStreamCallback)
                data.each { entry -&amp;gt; 
                    entry.VisitList.each { visit -&amp;gt;
                        Map newData = [:]
                        newData.put("employer", entry.employer)
                        newData.put("loc_id", entry.loc_id)
                        newData.put("topId", entry.topId)
                        newData.put("VisitList", [visit])
                        FlowFile newFlowFile = session.create()
                        newFlowFile = session.write(newFlowFile, { outputStream -&amp;gt; outputStream.write(jsonOutput.toJson([newData]).getBytes(StandardCharsets.UTF_8)) } as OutputStreamCallback)
                        newFlowFile = session.putAllAttributes(newFlowFile, customAttributes)
                        session.transfer(newFlowFile, REL_SUCCESS)
                    }
                }
                session.remove(flowFile)
            }
            session.commit()
        } catch (final Throwable t) {
            log.error('{} failed to process due to {}; rolling back session', [this, t] as Object[])
            session.rollback(true)
            throw t
        }
    }
}

processor = new GroovyProcessor()&lt;/LI-CODE&gt;</description>
      <pubDate>Wed, 14 Jun 2023 19:33:28 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/How-to-split-particular-data-within-json-using-splitjson/m-p/372618#M241312</guid>
      <dc:creator>joseomjr</dc:creator>
      <dc:date>2023-06-14T19:33:28Z</dc:date>
    </item>
  </channel>
</rss>

