<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question MergeRecord based on schema; only merge records of same schema in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/MergeRecord-based-on-schema-only-merge-records-of-same/m-p/364141#M239111</link>
    <description>&lt;P&gt;My use-case is:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;1) Have API credentials&lt;/P&gt;&lt;P&gt;2) Use UpdateAttribute to update (1) schema, (2) s3 bucket/location (my list of reports)&lt;/P&gt;&lt;P&gt;3) Query API endpoint for report&lt;/P&gt;&lt;P&gt;4) API endpoint paginates and gets more records&lt;/P&gt;&lt;P&gt;5) Call MergeRecord&lt;/P&gt;&lt;P&gt;6) Save to s3&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Since 3, 4, 5, 6 are all the same, I'm re-using the processors like below (screenshot).&amp;nbsp; My problem is (5) MergeRecord will try to merge different schemas together, which is obviously a problem.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;How can I restructure this?&amp;nbsp; I'd like to re-use processors as much as possible, but still be able to add more schemas as my needs evolve.&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="CRISSAEGRIM_0-1676668339630.png" style="width: 400px;"&gt;&lt;img src="https://community.cloudera.com/t5/image/serverpage/image-id/36788iAC681685ADDC6177/image-size/medium?v=v2&amp;amp;px=400" role="button" title="CRISSAEGRIM_0-1676668339630.png" alt="CRISSAEGRIM_0-1676668339630.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Fri, 17 Feb 2023 21:13:52 GMT</pubDate>
    <dc:creator>CRISSAEGRIM</dc:creator>
    <dc:date>2023-02-17T21:13:52Z</dc:date>
    <item>
      <title>MergeRecord based on schema; only merge records of same schema</title>
      <link>https://community.cloudera.com/t5/Support-Questions/MergeRecord-based-on-schema-only-merge-records-of-same/m-p/364141#M239111</link>
      <description>&lt;P&gt;My use-case is:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;1) Have API credentials&lt;/P&gt;&lt;P&gt;2) Use UpdateAttribute to update (1) schema, (2) s3 bucket/location (my list of reports)&lt;/P&gt;&lt;P&gt;3) Query API endpoint for report&lt;/P&gt;&lt;P&gt;4) API endpoint paginates and gets more records&lt;/P&gt;&lt;P&gt;5) Call MergeRecord&lt;/P&gt;&lt;P&gt;6) Save to s3&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Since 3, 4, 5, 6 are all the same, I'm re-using the processors like below (screenshot).&amp;nbsp; My problem is (5) MergeRecord will try to merge different schemas together, which is obviously a problem.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;How can I restructure this?&amp;nbsp; I'd like to re-use processors as much as possible, but still be able to add more schemas as my needs evolve.&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="CRISSAEGRIM_0-1676668339630.png" style="width: 400px;"&gt;&lt;img src="https://community.cloudera.com/t5/image/serverpage/image-id/36788iAC681685ADDC6177/image-size/medium?v=v2&amp;amp;px=400" role="button" title="CRISSAEGRIM_0-1676668339630.png" alt="CRISSAEGRIM_0-1676668339630.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 17 Feb 2023 21:13:52 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/MergeRecord-based-on-schema-only-merge-records-of-same/m-p/364141#M239111</guid>
      <dc:creator>CRISSAEGRIM</dc:creator>
      <dc:date>2023-02-17T21:13:52Z</dc:date>
    </item>
    <item>
      <title>Re: MergeRecord based on schema; only merge records of same schema</title>
      <link>https://community.cloudera.com/t5/Support-Questions/MergeRecord-based-on-schema-only-merge-records-of-same/m-p/364269#M239147</link>
      <description>&lt;P&gt;I used&amp;nbsp;&lt;STRONG&gt;Correlation Attribute Name&lt;/STRONG&gt;&amp;nbsp;, setting it to `${schema.name}`, and it's working as expected.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Quote from documentation:&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;&amp;gt; If specified, two FlowFiles will be binned together only if they have the same value for this Attribute. If not specified, FlowFiles are bundled by the order in which they are pulled from the queue.&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 21 Feb 2023 15:30:23 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/MergeRecord-based-on-schema-only-merge-records-of-same/m-p/364269#M239147</guid>
      <dc:creator>CRISSAEGRIM</dc:creator>
      <dc:date>2023-02-21T15:30:23Z</dc:date>
    </item>
  </channel>
</rss>

