Support Questions

Find answers, ask questions, and share your expertise
Announcements
Celebrating as our community reaches 100,000 members! Thank you!

How to Split an array in a json file into multiple flowfiles with custom size of elements in an array in Nifi ?

avatar
Explorer

This is the content of my input json flowfile:

{"objectIDs":["20607","23679","24365","31444","34130","57462"]}

 My requirement is to divide it in such a way that each output flowfile has only 5 elements in it.
so if an objectIDs array has 17 element then I want 4 flowfile with the same json format with the first 3 having 5 elements and last one with the remaining 2.

for my example content provided above, I would want two flow files
First One:

{"objectIDs":["20607","23679","24365","31444","34130","57462"]}

Second one :

{"objectIDs":["57462"]}


How do I do this in Nifi ?

1 REPLY 1

avatar
Super Collaborator

I'm not sure if this can be done with out-of-the-box processors but I would do it with a Groovy based InvokeScriptedProcessor with code like this

 

import groovy.json.JsonOutput
import groovy.json.JsonSlurper

class GroovyProcessor implements Processor {
    PropertyDescriptor CHUNK_SIZE = new PropertyDescriptor.Builder()
        .name("CHUNK_SIZE")
        .displayName("Chunk Size")
        .description("The chunk size to break up the incoming list of values.")
        .required(true)
        .defaultValue("5")
        .addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR)
        .build()

    Relationship REL_SUCCESS = new Relationship.Builder()
        .name("success")
        .description('FlowFiles that were successfully processed are routed here')
        .build()

    Relationship REL_FAILURE = new Relationship.Builder()
        .name("failure")
        .description('FlowFiles that were not successfully processed are routed here')
        .build()

    ComponentLog log
    JsonSlurper jsonSlurper = new JsonSlurper()
    JsonOutput jsonOutput = new JsonOutput()

    void initialize(ProcessorInitializationContext context) {
        log = context.logger
    }

    Set<Relationship> getRelationships() {
        Set<Relationship> relationships = new HashSet<>()
        relationships.add(REL_FAILURE)
        relationships.add(REL_SUCCESS)
        return relationships
    }

    Collection<ValidationResult> validate(ValidationContext context) {
        
    }
    
    PropertyDescriptor getPropertyDescriptor(String name) {
        
    }
    
    void onPropertyModified(PropertyDescriptor descriptor, String oldValue, String newValue) {

    }
    
    List<PropertyDescriptor> getPropertyDescriptors() {
        List<PropertyDescriptor> descriptors = new ArrayList<>()
        descriptors.add(CHUNK_SIZE)
        return Collections.unmodifiableList(descriptors)
    }
    
    String getIdentifier() {
        
    }

    void onScheduled(ProcessContext context) throws ProcessException {
        
    }

    void onUnscheduled(ProcessContext context) throws ProcessException {
        
    }

    void onStopped(ProcessContext context) throws ProcessException {
        
    }

    void setLogger(ComponentLog logger) {
        
    }

    void onTrigger(ProcessContext context, ProcessSessionFactory sessionFactory) throws ProcessException {
        ProcessSession session = sessionFactory.createSession()
        try {
            List<FlowFile> flowFiles = session.get(1)
            if (!flowFiles) return
            Integer chunkSize = context.getProperty(CHUNK_SIZE).asInteger()
            flowFiles.each { FlowFile flowFile ->
                Map customAttributes = [ "mime.type": "application/json" ]
                Map data = null
                session.read(flowFile, { inputStream -> data = jsonSlurper.parse(inputStream) } as InputStreamCallback)
                List<List<String>> chunkedObjectIDs = data.objectIDs.collate(chunkSize)
                chunkedObjectIDs.each { chunk ->
                    data = [
                        "objectIDs": chunk
                    ]
                    FlowFile newFlowFile = session.create()
                    newFlowFile = session.write(newFlowFile, { outputStream -> outputStream.write(jsonOutput.toJson(data).getBytes("UTF-8")) } as OutputStreamCallback)
                    session.putAllAttributes(newFlowFile, customAttributes)
                    session.transfer(newFlowFile, REL_SUCCESS)
                }
                session.remove(flowFile)
            }
            session.commit()
        } catch (final Throwable t) {
            log.error('{} failed to process due to {}; rolling back session', [this, t] as Object[])
            session.rollback(true)
            throw t
        }
    }
}

processor = new GroovyProcessor()