<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: Kafka--&amp;gt;Nifi--parquet---&amp;gt;HDFS in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/Kafka-gt-Nifi-parquet-gt-HDFS/m-p/369465#M240493</link>
    <description>&lt;PRE&gt;ConvertRecord[id=bc90b7d4-0187-1000-0000-00003eadf340] Failed to process FlowFile[filename=043b3d7b-5de0-4f7e-842f-2cbe5c972ace]; will route to failure: org.apache.parquet.schema.InvalidSchemaException: Cannot write a schema with an empty group: required group file {&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;I'm trying differently now&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Снимок экрана 2023-04-26 в 15.10.13.png" style="width: 999px;"&gt;&lt;img src="https://community.cloudera.com/t5/image/serverpage/image-id/37377iB32A1A1408036480/image-size/large?v=v2&amp;amp;px=999" role="button" title="Снимок экрана 2023-04-26 в 15.10.13.png" alt="Снимок экрана 2023-04-26 в 15.10.13.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;avroshemaregistry&lt;/P&gt;&lt;DIV&gt;{&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; "type": "record",&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; "name": "kafka_record",&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; "fields": [&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "upstream_response_time", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "ssl_cipher", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "upstream", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "log", "type": {&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; "type": "record",&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; "name": "log",&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; "fields": [&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; {"name": "offset", "type": "long"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; {"name": "file", "type": {&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; "type": "record",&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; "name": "file",&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; "fields": []&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; }}&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; ]&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; }},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "method", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "clientip", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "user_agent", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "realip", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "http_accept_encoding", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "country", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "timestamp_delivery", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "http_accept_language", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "scheme", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "request_id", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "http_referer", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "req_lengths", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "server_protocol", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "request", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "request_time", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "ssl_protocol", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "host", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "cache", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "input", "type": {&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; "type": "record",&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; "name": "input",&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; "fields": []&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; }},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "agent", "type": {&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; "type": "record",&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; "name": "agent",&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; "fields": []&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; }},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "hostname_logstash", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "x_requested_with", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "status", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "project_id", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "cookie_session", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "timestamp", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "serverip", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "geo", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "source", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "upstream_status", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "upstream_port", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "hostname", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "size", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "ssl_ja3_hash", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "sni", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "http_accept", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "location_id", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "server_port", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;{"name": "timestamp_record", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;{"name": "param_request", "type": "string"}&lt;/DIV&gt;&lt;DIV&gt;]&lt;/DIV&gt;&lt;DIV&gt;}&lt;BR /&gt;&lt;BR /&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Снимок экрана 2023-04-26 в 15.11.42.png" style="width: 999px;"&gt;&lt;img src="https://community.cloudera.com/t5/image/serverpage/image-id/37378i39DDC8C2DEBBF20A/image-size/large?v=v2&amp;amp;px=999" role="button" title="Снимок экрана 2023-04-26 в 15.11.42.png" alt="Снимок экрана 2023-04-26 в 15.11.42.png" /&gt;&lt;/span&gt;&lt;/DIV&gt;</description>
    <pubDate>Wed, 26 Apr 2023 12:12:09 GMT</pubDate>
    <dc:creator>VLban</dc:creator>
    <dc:date>2023-04-26T12:12:09Z</dc:date>
    <item>
      <title>Kafka--&gt;Nifi--parquet---&gt;HDFS</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Kafka-gt-Nifi-parquet-gt-HDFS/m-p/368903#M240304</link>
      <description>&lt;P&gt;My process looks like this&lt;BR /&gt;ConsumerKafka 2.6 --- JoltTransformJSON---- ConvertRecord ----MergeRecord---PutHDFS&lt;BR /&gt;my task is i get data from kafka in jsion format i need to write large files to ndfs in parquet format, but i get small files of 10kb each i would like to receive files of at least 100mb (changed settings in merge always sends small files) and i get flow files, but you need parquet what's wrong.&lt;BR /&gt;1- The file must be large&lt;BR /&gt;2- file must be parquet&lt;/P&gt;</description>
      <pubDate>Wed, 19 Apr 2023 10:52:19 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Kafka-gt-Nifi-parquet-gt-HDFS/m-p/368903#M240304</guid>
      <dc:creator>VLban</dc:creator>
      <dc:date>2023-04-19T10:52:19Z</dc:date>
    </item>
    <item>
      <title>Re: Kafka--&gt;Nifi--parquet---&gt;HDFS</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Kafka-gt-Nifi-parquet-gt-HDFS/m-p/368912#M240308</link>
      <description>&lt;P&gt;hi &lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/104679"&gt;@VLban&lt;/a&gt;,&lt;BR /&gt;&lt;BR /&gt;How do your files look like before reaching MergeRecord and how do they look after they went through MergeRecord? Besides that, what settings did you use in your MergeRecord?&lt;/P&gt;&lt;P&gt;&lt;BR /&gt;For your two requirements, everything depends on how you configure MergeRecord.&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;To generate parquet files, you set the Parquet Writer in your Record Writer property.&lt;/LI&gt;&lt;LI&gt;For Large Files, you must define the Minimum Bin size, the Minimum Number of Records and optional Max Bin Age. What it would also help is the Correlation Attribute Name.&lt;/LI&gt;&lt;/UL&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 19 Apr 2023 13:58:59 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Kafka-gt-Nifi-parquet-gt-HDFS/m-p/368912#M240308</guid>
      <dc:creator>cotopaul</dc:creator>
      <dc:date>2023-04-19T13:58:59Z</dc:date>
    </item>
    <item>
      <title>Re: Kafka--&gt;Nifi--parquet---&gt;HDFS</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Kafka-gt-Nifi-parquet-gt-HDFS/m-p/369421#M240486</link>
      <description>&lt;P&gt;It’s not clear to me about the process, I managed to shorten it, but this is not a parquet Consumer_kafka--MergeContent---PutHDFS at the output in HDFS GZ, you need to somehow change the parquet in MergeContent to include such a FlowFile MergeContent from kafka into one large file, as I do in Parquet this happen?&lt;/P&gt;&lt;DIV&gt;FlowFile Properties&lt;/DIV&gt;&lt;DIV&gt;Key: 'entryDate'&lt;/DIV&gt;&lt;DIV&gt;Value: 'Tue Apr 25 09:14:38 MSK 2023'&lt;/DIV&gt;&lt;DIV&gt;Key: 'lineageStartDate'&lt;/DIV&gt;&lt;DIV&gt;Value: 'Tue Apr 25 09:14:38 MSK 2023'&lt;/DIV&gt;&lt;DIV&gt;Key: 'fileSize'&lt;/DIV&gt;&lt;DIV&gt;Value: '1305'&lt;/DIV&gt;&lt;DIV&gt;FlowFile Attribute Map Content&lt;/DIV&gt;&lt;DIV&gt;Key: 'filename'&lt;/DIV&gt;&lt;DIV&gt;Value: '597dd31f-294d3-f5301d4c446b'&lt;/DIV&gt;&lt;DIV&gt;Key: 'kafka.consumer.id'&lt;/DIV&gt;&lt;DIV&gt;Value: 'readtopicnifi'&lt;/DIV&gt;&lt;DIV&gt;Key: 'kafka.consumer.offsets.committed'&lt;/DIV&gt;&lt;DIV&gt;Value: 'true'&lt;/DIV&gt;&lt;DIV&gt;Key: 'kafka.leader.epoch'&lt;/DIV&gt;&lt;DIV&gt;Value: '2'&lt;/DIV&gt;&lt;DIV&gt;Key: 'kafka.max.offset'&lt;/DIV&gt;&lt;DIV&gt;Value: '11016028'&lt;/DIV&gt;&lt;DIV&gt;Key: 'kafka.offset'&lt;/DIV&gt;&lt;DIV&gt;Value: '110168'&lt;/DIV&gt;&lt;DIV&gt;Key: 'kafka.partition'&lt;/DIV&gt;&lt;DIV&gt;Value: '0'&lt;/DIV&gt;&lt;DIV&gt;Key: 'kafka.timestamp'&lt;/DIV&gt;&lt;DIV&gt;Value: '1681997605'&lt;/DIV&gt;&lt;DIV&gt;Key: 'kafka.topic'&lt;/DIV&gt;&lt;DIV&gt;Value: 'ng'&lt;/DIV&gt;&lt;DIV&gt;Key: 'path'&lt;/DIV&gt;&lt;DIV&gt;Value: './'&lt;/DIV&gt;&lt;DIV&gt;Key: 'uuid'&lt;/DIV&gt;&lt;DIV&gt;Value: '091096d655236-4b0f-a0bf-55у3d3e819'&lt;/DIV&gt;&lt;P&gt;&lt;BR /&gt;MergeRECORd recordread JsonTree doesn't work as flowfile comes from kafka process, how can this be implemented for parquet?&lt;/P&gt;</description>
      <pubDate>Wed, 26 Apr 2023 06:23:08 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Kafka-gt-Nifi-parquet-gt-HDFS/m-p/369421#M240486</guid>
      <dc:creator>VLban</dc:creator>
      <dc:date>2023-04-26T06:23:08Z</dc:date>
    </item>
    <item>
      <title>Re: Kafka--&gt;Nifi--parquet---&gt;HDFS</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Kafka-gt-Nifi-parquet-gt-HDFS/m-p/369422#M240487</link>
      <description>&lt;P&gt;&lt;SPAN&gt;It’s not clear to me about the process, I managed to shorten it, but this is not a parquet Consumer_kafka--MergeContent---PutHDFS at the output in HDFS GZ, you need to somehow change the parquet in MergeContent to include such a FlowFile MergeContent from kafka into one large file, as I do in Parquet this happen?&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 26 Apr 2023 06:24:10 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Kafka-gt-Nifi-parquet-gt-HDFS/m-p/369422#M240487</guid>
      <dc:creator>VLban</dc:creator>
      <dc:date>2023-04-26T06:24:10Z</dc:date>
    </item>
    <item>
      <title>Re: Kafka--&gt;Nifi--parquet---&gt;HDFS</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Kafka-gt-Nifi-parquet-gt-HDFS/m-p/369429#M240488</link>
      <description>&lt;P&gt;First of all, you need to identify how the data comes from your kafka processor.Once you identified how the data comes from your kafka brokers, you can define the Record Reader in your MergeRecord Processor --&amp;gt; based on your original description I assume JSON, so you need a JSONTreeReader. Knowing that you want your data to be delivered as parquet, within your Record Writer, you must define a ParquestRecordSetWriter Controller Service, which will transform your data (which is read with the Record Reader) into a Parquet File (which is written with the Record Writer).&lt;/P&gt;</description>
      <pubDate>Wed, 26 Apr 2023 07:55:43 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Kafka-gt-Nifi-parquet-gt-HDFS/m-p/369429#M240488</guid>
      <dc:creator>cotopaul</dc:creator>
      <dc:date>2023-04-26T07:55:43Z</dc:date>
    </item>
    <item>
      <title>Re: Kafka--&gt;Nifi--parquet---&gt;HDFS</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Kafka-gt-Nifi-parquet-gt-HDFS/m-p/369446#M240490</link>
      <description>&lt;P&gt;my settings from jsontreeRead:&lt;/P&gt;&lt;DIV class="ui-widget-content slick-row even"&gt;&lt;DIV class="slick-cell l0 r0"&gt;&lt;SPAN class="table-cell required"&gt;Schema Access Strategy&amp;nbsp;&lt;/SPAN&gt;&lt;SPAN&gt;Infer Schema&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;DIV class="ui-widget-content slick-row odd"&gt;&lt;DIV class="slick-cell l0 r0"&gt;&lt;SPAN class="table-cell"&gt;Schema Inference Cache&amp;nbsp;&lt;/SPAN&gt;&lt;SPAN&gt;No value set&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;DIV class="ui-widget-content slick-row even"&gt;&lt;DIV class="slick-cell l0 r0"&gt;&lt;SPAN class="table-cell required"&gt;Starting Field Strategy&amp;nbsp;&lt;/SPAN&gt;&lt;SPAN&gt;Root Node&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;DIV class="ui-widget-content slick-row odd"&gt;&lt;DIV class="slick-cell l0 r0"&gt;&lt;SPAN class="table-cell"&gt;Date Format&amp;nbsp;&lt;/SPAN&gt;&lt;SPAN&gt;yyyy-MM-dd&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;DIV class="ui-widget-content slick-row even"&gt;&lt;DIV class="slick-cell l0 r0"&gt;&lt;SPAN class="table-cell"&gt;Time Format&amp;nbsp;&lt;/SPAN&gt;&lt;SPAN&gt;HH:mm:ss&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;DIV class="ui-widget-content slick-row odd"&gt;&lt;DIV class="slick-cell l0 r0"&gt;&lt;SPAN class="table-cell"&gt;Timestamp Format&amp;nbsp;&lt;/SPAN&gt;&lt;SPAN&gt;yyyy-MM-dd'T'HH:mm:ss.SSSX&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV class="slick-cell l0 r0"&gt;&amp;nbsp;&lt;/DIV&gt;&lt;DIV class="slick-cell l0 r0"&gt;&lt;SPAN&gt;parquetrecordsetwriter&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV class="slick-cell l0 r0"&gt;&lt;DIV class="slick-pane slick-pane-header slick-pane-left"&gt;&lt;DIV class="slick-header ui-state-default slick-header-left"&gt;&lt;DIV class="slick-header-columns slick-header-columns-left"&gt;&lt;DIV class="ui-state-default slick-header-column"&gt;&lt;DIV class="slick-resizable-handle"&gt;&lt;SPAN&gt;Schema Write Strategy&lt;/SPAN&gt;&lt;SPAN&gt;Do Not Write Schema&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;DIV class="slick-pane slick-pane-top slick-pane-left"&gt;&lt;DIV class="slick-viewport slick-viewport-top slick-viewport-left"&gt;&lt;DIV class="grid-canvas grid-canvas-top grid-canvas-left"&gt;&lt;DIV class="ui-widget-content slick-row odd"&gt;&lt;DIV class="slick-cell l0 r0"&gt;&lt;SPAN class="table-cell"&gt;Schema Cache&lt;/SPAN&gt;&lt;SPAN&gt;No value set&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;DIV class="ui-widget-content slick-row even"&gt;&lt;DIV class="slick-cell l0 r0"&gt;&lt;SPAN class="table-cell required"&gt;Schema Access Strategy&lt;/SPAN&gt;&lt;SPAN&gt;Inherit Record Schema&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;DIV class="ui-widget-content slick-row odd"&gt;&lt;DIV class="slick-cell l0 r0"&gt;&lt;SPAN class="table-cell required"&gt;Cache Size&lt;/SPAN&gt;&lt;SPAN&gt;1000&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;DIV class="ui-widget-content slick-row even"&gt;&lt;DIV class="slick-cell l0 r0"&gt;&lt;SPAN class="table-cell required"&gt;Compression Type&lt;/SPAN&gt;&lt;SPAN&gt;UNCOMPRESSED&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;DIV class="ui-widget-content slick-row odd"&gt;&lt;DIV class="slick-cell l0 r0"&gt;&lt;SPAN class="table-cell"&gt;Row Group Size&lt;/SPAN&gt;&lt;SPAN&gt;No value set&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;DIV class="ui-widget-content slick-row even"&gt;&lt;DIV class="slick-cell l0 r0"&gt;&lt;SPAN class="table-cell"&gt;Page Size&lt;/SPAN&gt;&lt;SPAN&gt;No value set&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;DIV class="ui-widget-content slick-row odd"&gt;&lt;DIV class="slick-cell l0 r0"&gt;&lt;SPAN class="table-cell"&gt;Dictionary Page Size&lt;/SPAN&gt;&lt;SPAN&gt;No value set&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;DIV class="ui-widget-content slick-row even"&gt;&lt;DIV class="slick-cell l0 r0"&gt;&lt;SPAN class="table-cell"&gt;Max Padding Size&lt;/SPAN&gt;&lt;SPAN&gt;No value set&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;DIV class="ui-widget-content slick-row odd"&gt;&lt;DIV class="slick-cell l0 r0"&gt;&lt;SPAN class="table-cell"&gt;Enable Dictionary Encoding&lt;/SPAN&gt;&lt;SPAN&gt;No value set&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;DIV class="ui-widget-content slick-row even"&gt;&lt;DIV class="slick-cell l0 r0"&gt;&lt;SPAN class="table-cell"&gt;Enable Validation&lt;/SPAN&gt;&lt;SPAN class="unset"&gt;No value set&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;DIV class="ui-widget-content slick-row odd"&gt;&lt;DIV class="slick-cell l0 r0"&gt;&lt;SPAN class="table-cell"&gt;Writer Version&lt;/SPAN&gt;&lt;SPAN&gt;No value set&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;DIV class="ui-widget-content slick-row even"&gt;&lt;DIV class="slick-cell l0 r0"&gt;&lt;SPAN class="table-cell required"&gt;Avro Write Old List Structure&lt;/SPAN&gt;&lt;SPAN&gt;false&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;DIV class="ui-widget-content slick-row odd"&gt;&lt;DIV class="slick-cell l0 r0"&gt;&lt;SPAN class="table-cell required"&gt;Avro Add List Element Records&lt;/SPAN&gt;&lt;SPAN&gt;false&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV class="slick-cell l2 r2"&gt;&lt;SPAN class="table-cell"&gt;INT96 Fields&lt;/SPAN&gt;&lt;SPAN&gt;No value set&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV class="slick-cell l2 r2"&gt;&amp;nbsp;&lt;/DIV&gt;&lt;DIV class="slick-cell l2 r2"&gt;I don’t have a json schema, I want it to generate a schema for the input data and send it to the parquet on my parameters, this does not work&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;</description>
      <pubDate>Wed, 26 Apr 2023 10:46:04 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Kafka-gt-Nifi-parquet-gt-HDFS/m-p/369446#M240490</guid>
      <dc:creator>VLban</dc:creator>
      <dc:date>2023-04-26T10:46:04Z</dc:date>
    </item>
    <item>
      <title>Re: Kafka--&gt;Nifi--parquet---&gt;HDFS</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Kafka-gt-Nifi-parquet-gt-HDFS/m-p/369456#M240492</link>
      <description>&lt;P&gt;&lt;SPAN&gt;I don’t have a json schema, I want it to generate a schema for the input data and send it to the parquet on my parameters, this does not work&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 26 Apr 2023 10:47:17 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Kafka-gt-Nifi-parquet-gt-HDFS/m-p/369456#M240492</guid>
      <dc:creator>VLban</dc:creator>
      <dc:date>2023-04-26T10:47:17Z</dc:date>
    </item>
    <item>
      <title>Re: Kafka--&gt;Nifi--parquet---&gt;HDFS</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Kafka-gt-Nifi-parquet-gt-HDFS/m-p/369465#M240493</link>
      <description>&lt;PRE&gt;ConvertRecord[id=bc90b7d4-0187-1000-0000-00003eadf340] Failed to process FlowFile[filename=043b3d7b-5de0-4f7e-842f-2cbe5c972ace]; will route to failure: org.apache.parquet.schema.InvalidSchemaException: Cannot write a schema with an empty group: required group file {&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;I'm trying differently now&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Снимок экрана 2023-04-26 в 15.10.13.png" style="width: 999px;"&gt;&lt;img src="https://community.cloudera.com/t5/image/serverpage/image-id/37377iB32A1A1408036480/image-size/large?v=v2&amp;amp;px=999" role="button" title="Снимок экрана 2023-04-26 в 15.10.13.png" alt="Снимок экрана 2023-04-26 в 15.10.13.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;avroshemaregistry&lt;/P&gt;&lt;DIV&gt;{&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; "type": "record",&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; "name": "kafka_record",&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; "fields": [&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "upstream_response_time", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "ssl_cipher", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "upstream", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "log", "type": {&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; "type": "record",&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; "name": "log",&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; "fields": [&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; {"name": "offset", "type": "long"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; {"name": "file", "type": {&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; "type": "record",&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; "name": "file",&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; "fields": []&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; }}&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; ]&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; }},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "method", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "clientip", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "user_agent", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "realip", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "http_accept_encoding", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "country", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "timestamp_delivery", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "http_accept_language", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "scheme", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "request_id", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "http_referer", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "req_lengths", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "server_protocol", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "request", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "request_time", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "ssl_protocol", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "host", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "cache", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "input", "type": {&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; "type": "record",&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; "name": "input",&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; "fields": []&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; }},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "agent", "type": {&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; "type": "record",&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; "name": "agent",&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; "fields": []&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; }},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "hostname_logstash", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "x_requested_with", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "status", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "project_id", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "cookie_session", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "timestamp", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "serverip", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "geo", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "source", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "upstream_status", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "upstream_port", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "hostname", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "size", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "ssl_ja3_hash", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "sni", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "http_accept", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "location_id", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp; &amp;nbsp; {"name": "server_port", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;{"name": "timestamp_record", "type": "string"},&lt;/DIV&gt;&lt;DIV&gt;{"name": "param_request", "type": "string"}&lt;/DIV&gt;&lt;DIV&gt;]&lt;/DIV&gt;&lt;DIV&gt;}&lt;BR /&gt;&lt;BR /&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Снимок экрана 2023-04-26 в 15.11.42.png" style="width: 999px;"&gt;&lt;img src="https://community.cloudera.com/t5/image/serverpage/image-id/37378i39DDC8C2DEBBF20A/image-size/large?v=v2&amp;amp;px=999" role="button" title="Снимок экрана 2023-04-26 в 15.11.42.png" alt="Снимок экрана 2023-04-26 в 15.11.42.png" /&gt;&lt;/span&gt;&lt;/DIV&gt;</description>
      <pubDate>Wed, 26 Apr 2023 12:12:09 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Kafka-gt-Nifi-parquet-gt-HDFS/m-p/369465#M240493</guid>
      <dc:creator>VLban</dc:creator>
      <dc:date>2023-04-26T12:12:09Z</dc:date>
    </item>
    <item>
      <title>Re: Kafka--&gt;Nifi--parquet---&gt;HDFS</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Kafka-gt-Nifi-parquet-gt-HDFS/m-p/369482#M240497</link>
      <description>&lt;P&gt;I figured out this error, then I have a problem, I get files in HDFS, but they are without the parquet extension, I don’t understand what kind of files these are if I put them everywhere without compression&lt;/P&gt;</description>
      <pubDate>Wed, 26 Apr 2023 15:00:35 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Kafka-gt-Nifi-parquet-gt-HDFS/m-p/369482#M240497</guid>
      <dc:creator>VLban</dc:creator>
      <dc:date>2023-04-26T15:00:35Z</dc:date>
    </item>
    <item>
      <title>Re: Kafka--&gt;Nifi--parquet---&gt;HDFS</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Kafka-gt-Nifi-parquet-gt-HDFS/m-p/369489#M240499</link>
      <description>&lt;P&gt;puthdfs process can file give transform parquet to HDFS or do i need putparquet to HDFS?&lt;/P&gt;</description>
      <pubDate>Wed, 26 Apr 2023 15:30:16 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Kafka-gt-Nifi-parquet-gt-HDFS/m-p/369489#M240499</guid>
      <dc:creator>VLban</dc:creator>
      <dc:date>2023-04-26T15:30:16Z</dc:date>
    </item>
    <item>
      <title>Re: Kafka--&gt;Nifi--parquet---&gt;HDFS</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Kafka-gt-Nifi-parquet-gt-HDFS/m-p/369539#M240506</link>
      <description>&lt;P&gt;If you need the parquet extension you can use PutHDFS and define the path to your location and add the extension after your filename. Something like: /path/to/where/you/want/${filename}.parquet.&lt;BR /&gt;otherwise you can implement an updateattribute before puthdfs and rename your flowfile from ${filename} into ${filename}.parquet.&lt;BR /&gt;Or you can use PutParquet and that's is all.&lt;/P&gt;</description>
      <pubDate>Wed, 26 Apr 2023 20:12:16 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Kafka-gt-Nifi-parquet-gt-HDFS/m-p/369539#M240506</guid>
      <dc:creator>cotopaul</dc:creator>
      <dc:date>2023-04-26T20:12:16Z</dc:date>
    </item>
    <item>
      <title>Re: Kafka--&gt;Nifi--parquet---&gt;HDFS</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Kafka-gt-Nifi-parquet-gt-HDFS/m-p/369596#M240512</link>
      <description>&lt;P&gt;this method does not work, puthdfs creates a directory with the extension directory1.parquet and inside the directory there is still a file without extension, I tried like this /user/test/${&lt;SPAN&gt;now():toNumber()}.parquet,&amp;nbsp;but I need to have files in the same test directory and not parquet directories&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 27 Apr 2023 08:12:01 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Kafka-gt-Nifi-parquet-gt-HDFS/m-p/369596#M240512</guid>
      <dc:creator>VLban</dc:creator>
      <dc:date>2023-04-27T08:12:01Z</dc:date>
    </item>
    <item>
      <title>Re: Kafka--&gt;Nifi--parquet---&gt;HDFS</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Kafka-gt-Nifi-parquet-gt-HDFS/m-p/369597#M240513</link>
      <description>&lt;P&gt;Add a UpdateAttribute in front of PutHDFS and use NEL to rename your file from ${filename} to ${filename}.parquet and then save it into HDFS wherever you want.&lt;/P&gt;</description>
      <pubDate>Thu, 27 Apr 2023 08:14:31 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Kafka-gt-Nifi-parquet-gt-HDFS/m-p/369597#M240513</guid>
      <dc:creator>cotopaul</dc:creator>
      <dc:date>2023-04-27T08:14:31Z</dc:date>
    </item>
    <item>
      <title>Re: Kafka--&gt;Nifi--parquet---&gt;HDFS</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Kafka-gt-Nifi-parquet-gt-HDFS/m-p/369683#M240530</link>
      <description>&lt;P&gt;yes, it helped me, it's a pity that there is no built-in functionality for puthdfs&lt;/P&gt;</description>
      <pubDate>Fri, 28 Apr 2023 05:20:13 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Kafka-gt-Nifi-parquet-gt-HDFS/m-p/369683#M240530</guid>
      <dc:creator>VLban</dc:creator>
      <dc:date>2023-04-28T05:20:13Z</dc:date>
    </item>
    <item>
      <title>Re: Kafka--&gt;Nifi--parquet---&gt;HDFS</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Kafka-gt-Nifi-parquet-gt-HDFS/m-p/369711#M240538</link>
      <description>&lt;P&gt;is it possible to make three processes per kafka and so that the output data is not repeated?&lt;/P&gt;</description>
      <pubDate>Fri, 28 Apr 2023 10:17:36 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Kafka-gt-Nifi-parquet-gt-HDFS/m-p/369711#M240538</guid>
      <dc:creator>VLban</dc:creator>
      <dc:date>2023-04-28T10:17:36Z</dc:date>
    </item>
    <item>
      <title>Re: Kafka--&gt;Nifi--parquet---&gt;HDFS</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Kafka-gt-Nifi-parquet-gt-HDFS/m-p/369718#M240541</link>
      <description>&lt;P&gt;another interesting point is how to implement on updateattribute the function of checking whether it was delivered to puthdfs because if hdfs runs out locally, the process continues to go and the files are not written but are thrown out of the queue and go to another file in the basket, in fact, files are lost if the meso runs out and the chain does not stop , you need to check if the file in ndfs did not arrive stop the stream or the memory ran out in ndfs stop putndfs and let the recycle bin fill up&lt;/P&gt;</description>
      <pubDate>Fri, 28 Apr 2023 12:05:15 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Kafka-gt-Nifi-parquet-gt-HDFS/m-p/369718#M240541</guid>
      <dc:creator>VLban</dc:creator>
      <dc:date>2023-04-28T12:05:15Z</dc:date>
    </item>
  </channel>
</rss>

