<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Move files from a spooling directory to HDFS with flume in Archives of Support Questions (Read Only)</title>
    <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Move-files-from-a-spooling-directory-to-HDFS-with-flume/m-p/42181#M32513</link>
    <description>&lt;TABLE&gt;
&lt;TBODY&gt;
&lt;TR&gt;
&lt;TD&gt;
&lt;DIV class="vote"&gt;
&lt;DIV class="favoritecount"&gt;&amp;nbsp;&lt;/DIV&gt;
&lt;/DIV&gt;
&lt;/TD&gt;
&lt;TD&gt;
&lt;DIV&gt;
&lt;DIV class="post-text"&gt;
&lt;P&gt;Hi I am using flume to copy the files from spooling directory to HDFS using file as the channel.&lt;/P&gt;
&lt;PRE&gt;#Component names
a1.sources = src
a1.channels = c1
a1.sinks = k1

#Source details
a1.sources.src.type = spooldir
a1.sources.src.channels = c1
a1.sources.src.spoolDir = /home/cloudera/onetrail
a1.sources.src.fileHeader = false
a1.sources.src.basenameHeader = true
# a1.sources.src.basenameHeaderKey = basename
a1.sources.src.fileSuffix = .COMPLETED
a1.sources.src.threads = 4
a1.sources.src.interceptors = newint
a1.sources.src.interceptors.newint.type = timestamp

#Sink details
a1.sinks.k1.type = hdfs
a1.sinks.k1.channel = c1
a1.sinks.k1.hdfs.path = hdfs:///data/contentProviders/cnet/%Y%m%d/
# a1.sinks.k1.hdfs.round = false
# a1.sinks.k1.hdfs.roundValue = 1
# a1.sinks.k1.hdfs.roundUnit = second
a1.sinks.k1.hdfs.writeFormat = Text
a1.sinks.k1.hdfs.fileType = DataStream
#a1.sinks.k1.hdfs.file.Type = DataStream
a1.sinks.k1.hdfs.filePrefix = %{basename}
# a1.sinks.k1.hdfs.fileSuffix = .xml
a1.sinks.k1.threadsPoolSize = 4

# use a single file at a time
a1.sinks.k1.hdfs.maxOpenFiles = 1

# rollover file based on maximum size of 10 MB
a1.sinks.k1.hdfs.rollCount = 0
a1.sinks.k1.hdfs.rollInterval = 0
a1.sinks.k1.hdfs.rollSize = 0
a1.sinks.k1.hdfs.batchSize = 12

# Channel details
a1.channels.c1.type = file
a1.channels.c1.checkpointDir = /tmp/flume/checkpoint/
a1.channels.c1.dataDirs = /tmp/flume/data/

# Bind the source and sink to the channel
a1.sources.src.channels = c1
a1.sinks.k1.channels = c1&lt;/PRE&gt;
&lt;P&gt;with the above configuration it is able to copy the files to hdfs but the problem which i am facing is one file is keep staying as .tmp and not copying the complete file content.&lt;/P&gt;
&lt;P&gt;Can some one help me what could be the problem.&lt;/P&gt;
&lt;PRE&gt;&amp;nbsp;&lt;/PRE&gt;
&lt;/DIV&gt;
&lt;/DIV&gt;
&lt;/TD&gt;
&lt;/TR&gt;
&lt;/TBODY&gt;
&lt;/TABLE&gt;</description>
    <pubDate>Fri, 16 Sep 2022 10:26:29 GMT</pubDate>
    <dc:creator>Raghava9</dc:creator>
    <dc:date>2022-09-16T10:26:29Z</dc:date>
    <item>
      <title>Move files from a spooling directory to HDFS with flume</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Move-files-from-a-spooling-directory-to-HDFS-with-flume/m-p/42181#M32513</link>
      <description>&lt;TABLE&gt;
&lt;TBODY&gt;
&lt;TR&gt;
&lt;TD&gt;
&lt;DIV class="vote"&gt;
&lt;DIV class="favoritecount"&gt;&amp;nbsp;&lt;/DIV&gt;
&lt;/DIV&gt;
&lt;/TD&gt;
&lt;TD&gt;
&lt;DIV&gt;
&lt;DIV class="post-text"&gt;
&lt;P&gt;Hi I am using flume to copy the files from spooling directory to HDFS using file as the channel.&lt;/P&gt;
&lt;PRE&gt;#Component names
a1.sources = src
a1.channels = c1
a1.sinks = k1

#Source details
a1.sources.src.type = spooldir
a1.sources.src.channels = c1
a1.sources.src.spoolDir = /home/cloudera/onetrail
a1.sources.src.fileHeader = false
a1.sources.src.basenameHeader = true
# a1.sources.src.basenameHeaderKey = basename
a1.sources.src.fileSuffix = .COMPLETED
a1.sources.src.threads = 4
a1.sources.src.interceptors = newint
a1.sources.src.interceptors.newint.type = timestamp

#Sink details
a1.sinks.k1.type = hdfs
a1.sinks.k1.channel = c1
a1.sinks.k1.hdfs.path = hdfs:///data/contentProviders/cnet/%Y%m%d/
# a1.sinks.k1.hdfs.round = false
# a1.sinks.k1.hdfs.roundValue = 1
# a1.sinks.k1.hdfs.roundUnit = second
a1.sinks.k1.hdfs.writeFormat = Text
a1.sinks.k1.hdfs.fileType = DataStream
#a1.sinks.k1.hdfs.file.Type = DataStream
a1.sinks.k1.hdfs.filePrefix = %{basename}
# a1.sinks.k1.hdfs.fileSuffix = .xml
a1.sinks.k1.threadsPoolSize = 4

# use a single file at a time
a1.sinks.k1.hdfs.maxOpenFiles = 1

# rollover file based on maximum size of 10 MB
a1.sinks.k1.hdfs.rollCount = 0
a1.sinks.k1.hdfs.rollInterval = 0
a1.sinks.k1.hdfs.rollSize = 0
a1.sinks.k1.hdfs.batchSize = 12

# Channel details
a1.channels.c1.type = file
a1.channels.c1.checkpointDir = /tmp/flume/checkpoint/
a1.channels.c1.dataDirs = /tmp/flume/data/

# Bind the source and sink to the channel
a1.sources.src.channels = c1
a1.sinks.k1.channels = c1&lt;/PRE&gt;
&lt;P&gt;with the above configuration it is able to copy the files to hdfs but the problem which i am facing is one file is keep staying as .tmp and not copying the complete file content.&lt;/P&gt;
&lt;P&gt;Can some one help me what could be the problem.&lt;/P&gt;
&lt;PRE&gt;&amp;nbsp;&lt;/PRE&gt;
&lt;/DIV&gt;
&lt;/DIV&gt;
&lt;/TD&gt;
&lt;/TR&gt;
&lt;/TBODY&gt;
&lt;/TABLE&gt;</description>
      <pubDate>Fri, 16 Sep 2022 10:26:29 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Move-files-from-a-spooling-directory-to-HDFS-with-flume/m-p/42181#M32513</guid>
      <dc:creator>Raghava9</dc:creator>
      <dc:date>2022-09-16T10:26:29Z</dc:date>
    </item>
    <item>
      <title>Re: Move files from a spooling directory to HDFS with flume</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Move-files-from-a-spooling-directory-to-HDFS-with-flume/m-p/42202#M32514</link>
      <description>You are specified that all roll values are zero:&lt;BR /&gt;a1.sinks.k1.hdfs.rollCount = 0&lt;BR /&gt;a1.sinks.k1.hdfs.rollInterval = 0&lt;BR /&gt;a1.sinks.k1.hdfs.rollSize = 0&lt;BR /&gt;&lt;BR /&gt;Which means the latest file will never roll (since you have hdfs.maxOpenFiles=1). I'd suggest adding the hdfs.idleTimeout if you want to make sure they roll after the file has been ingested and sent to hdfs.&lt;BR /&gt;&lt;BR /&gt;-pd</description>
      <pubDate>Tue, 21 Jun 2016 22:49:21 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Move-files-from-a-spooling-directory-to-HDFS-with-flume/m-p/42202#M32514</guid>
      <dc:creator>pdvorak</dc:creator>
      <dc:date>2016-06-21T22:49:21Z</dc:date>
    </item>
    <item>
      <title>Re: Move files from a spooling directory to HDFS with flume</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Move-files-from-a-spooling-directory-to-HDFS-with-flume/m-p/68047#M32516</link>
      <description>&lt;P&gt;Please Explain how to do the data transfer from Local file system to Hdfs using Taildir&amp;nbsp;Flume Source. My use case is to deal with real time data&amp;nbsp; so data in the source directory is keep updating&lt;/P&gt;</description>
      <pubDate>Mon, 11 Jun 2018 11:23:59 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Move-files-from-a-spooling-directory-to-HDFS-with-flume/m-p/68047#M32516</guid>
      <dc:creator>Swechchha</dc:creator>
      <dc:date>2018-06-11T11:23:59Z</dc:date>
    </item>
  </channel>
</rss>

