<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: Apache Airflow in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/Apache-Airflow/m-p/121352#M84115</link>
    <description>&lt;P&gt;anyone intergrated airflow with nifi &lt;A rel="user" href="https://community.cloudera.com/users/9304/tspann.html" nodeid="9304"&gt;@Timothy Spann&lt;/A&gt;&lt;/P&gt;</description>
    <pubDate>Wed, 10 Apr 2019 20:45:27 GMT</pubDate>
    <dc:creator>abinanths</dc:creator>
    <dc:date>2019-04-10T20:45:27Z</dc:date>
    <item>
      <title>Apache Airflow</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Apache-Airflow/m-p/121336#M84099</link>
      <description>&lt;P&gt;Has anyone integrated Apache Airflow and HDP?&lt;/P&gt;&lt;P&gt;It looks interesting.&lt;/P&gt;</description>
      <pubDate>Sat, 04 Jun 2016 22:59:04 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Apache-Airflow/m-p/121336#M84099</guid>
      <dc:creator>TimothySpann</dc:creator>
      <dc:date>2016-06-04T22:59:04Z</dc:date>
    </item>
    <item>
      <title>Re: Apache Airflow</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Apache-Airflow/m-p/121337#M84100</link>
      <description>&lt;P&gt;@&lt;A href="https://community.hortonworks.com/users/9304/tspann.html"&gt;Timothy Spann&lt;/A&gt;&lt;/P&gt;&lt;P&gt;Added @&lt;A href="https://community.hortonworks.com/users/381/cnauroth.html"&gt;Chris Nauroth&lt;/A&gt; to the thread. He is a mentor in this Apache project.&lt;/P&gt;</description>
      <pubDate>Tue, 27 Dec 2016 06:09:47 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Apache-Airflow/m-p/121337#M84100</guid>
      <dc:creator>cstanca</dc:creator>
      <dc:date>2016-12-27T06:09:47Z</dc:date>
    </item>
    <item>
      <title>Re: Apache Airflow</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Apache-Airflow/m-p/121338#M84101</link>
      <description>&lt;P&gt;Is there any best practice or installation guide out there by hortonworks to set up airflow within hdp and start random jobs?&lt;/P&gt;&lt;P&gt;I have seen there are some operators available and the rest could be managed via shell.&lt;/P&gt;</description>
      <pubDate>Fri, 10 Feb 2017 20:33:23 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Apache-Airflow/m-p/121338#M84101</guid>
      <dc:creator>jiiiiken88</dc:creator>
      <dc:date>2017-02-10T20:33:23Z</dc:date>
    </item>
    <item>
      <title>Re: Apache Airflow</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Apache-Airflow/m-p/121339#M84102</link>
      <description>&lt;P&gt;&lt;A href="https://airflow.incubator.apache.org/tutorial.html" target="_blank"&gt;https://airflow.incubator.apache.org/tutorial.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;Hortonworks does not support Airflow as of yet.   It's in pretty early incubation.&lt;/P&gt;&lt;P&gt;Perhaps @&lt;A href="https://community.hortonworks.com/users/381/cnauroth.html"&gt;Chris Nauroth&lt;/A&gt; can shed some light.&lt;/P&gt;&lt;P&gt;You might want to try out HDF (Apache NiFi) for job running&lt;/P&gt;&lt;P&gt;&lt;A href="https://wiki.apache.org/incubator/AirflowProposal" target="_blank"&gt;https://wiki.apache.org/incubator/AirflowProposal&lt;/A&gt;&lt;/P&gt;&lt;P&gt;Anything that works with Apache Hadoop will work with Hortonworks as HDP is pure 100% open source Apache Hadoop.&lt;/P&gt;&lt;P&gt;&lt;A href="http://nerds.airbnb.com/airflow/" target="_blank"&gt;http://nerds.airbnb.com/airflow/&lt;/A&gt;&lt;/P&gt;&lt;P&gt;This is Airbnb's project for the most part, so check out their info.&lt;/P&gt;&lt;P&gt;See:   &lt;A href="https://airflow.incubator.apache.org/code.html" target="_blank"&gt;https://airflow.incubator.apache.org/code.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;macros.random might assist you&lt;/P&gt;&lt;P&gt;What's your use case?&lt;/P&gt;</description>
      <pubDate>Fri, 10 Feb 2017 21:57:51 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Apache-Airflow/m-p/121339#M84102</guid>
      <dc:creator>TimothySpann</dc:creator>
      <dc:date>2017-02-10T21:57:51Z</dc:date>
    </item>
    <item>
      <title>Re: Apache Airflow</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Apache-Airflow/m-p/121340#M84103</link>
      <description>&lt;P&gt;Hi Timothy,&lt;/P&gt;&lt;P&gt;Thanks for your wuick reply. The point is that I am quite ungappy with oozie. Well, it does its job but handling the xmls is not my favourite. So i was looking for something more sophisticated where i can have a dependency between dofferent job packages (i.e. a coordinator in oozie).&lt;/P&gt;&lt;P&gt;I thought airflow cod be my solution.&lt;/P&gt;</description>
      <pubDate>Sat, 11 Feb 2017 03:55:08 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Apache-Airflow/m-p/121340#M84103</guid>
      <dc:creator>jiiiiken88</dc:creator>
      <dc:date>2017-02-11T03:55:08Z</dc:date>
    </item>
    <item>
      <title>Re: Apache Airflow</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Apache-Airflow/m-p/121341#M84104</link>
      <description>&lt;P&gt;Options:&lt;/P&gt;&lt;P&gt;1.  Coordinate the Jobs inside Spark&lt;/P&gt;&lt;P&gt;2.  Coordinate the Jobs with Apache NiFi (I have done Sqoop, Hive, HBase, Pig, Spark, Python and Deep Learning jobs with it)&lt;/P&gt;&lt;P&gt;3.  Manage Oozie with Falcon  &lt;A href="http://hortonworks.com/apache/falcon/" target="_blank"&gt;http://hortonworks.com/apache/falcon/&lt;/A&gt;&lt;/P&gt;&lt;P&gt;4.  HUE is part of HDP, &lt;A href="http://gethue.com/scheduling/" target="_blank"&gt;http://gethue.com/scheduling/&lt;/A&gt;&lt;/P&gt;&lt;P&gt;5.  Luigi - I used it a few times, seemed okay   &lt;A href="https://blog.kupstaitis-dunkler.com/2016/07/19/how-to-create-a-data-pipeline-using-luigi/" target="_blank"&gt;https://blog.kupstaitis-dunkler.com/2016/07/19/how-to-create-a-data-pipeline-using-luigi/&lt;/A&gt;&lt;/P&gt;&lt;P&gt;See:   &lt;A href="https://www.linkedin.com/pulse/nifi-vs-falcon-oozie-birender-saini" target="_blank"&gt;https://www.linkedin.com/pulse/nifi-vs-falcon-oozie-birender-saini&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Sat, 11 Feb 2017 04:03:25 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Apache-Airflow/m-p/121341#M84104</guid>
      <dc:creator>TimothySpann</dc:creator>
      <dc:date>2017-02-11T04:03:25Z</dc:date>
    </item>
    <item>
      <title>Re: Apache Airflow</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Apache-Airflow/m-p/121342#M84105</link>
      <description>&lt;P&gt;Thanks I will have a lookninto it. Especially  controlling jobs with spark sounds interesting. I haven't heared of it before. Do you have a source? Thanks again!&lt;/P&gt;</description>
      <pubDate>Sat, 11 Feb 2017 11:34:22 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Apache-Airflow/m-p/121342#M84105</guid>
      <dc:creator>jiiiiken88</dc:creator>
      <dc:date>2017-02-11T11:34:22Z</dc:date>
    </item>
    <item>
      <title>Re: Apache Airflow</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Apache-Airflow/m-p/121343#M84106</link>
      <description>&lt;P&gt;Hi Timothy,&lt;/P&gt;&lt;P&gt;Okay I had a closer look into it. For me it looks like ApacheNiFi (Hortonworks' DataFlow) is more or less a tool piping your data from a non Hadoopsystem (RDMBS, IoT,...) into Hadoop. Thereafter, an other tool is needed to manage data. Here, Apache Falcon has its strength.&lt;/P&gt;&lt;P&gt;Airflow, Luigi, Azkaban are solutions for broader scheduling tasks and need more effort to be installed (next) to your cluster.&lt;/P&gt;&lt;P&gt;Quickly dipping my toe into scheduling with Spark I didn't come up with many resources.&lt;/P&gt;&lt;P&gt;Last but not least Oozie (e.g. managed via Hue) seems like the easiest fit to manage all kind of workflows (Sqoop, Hive, Shell, Spark,...) within a cluster. Of course, I have dependencies between single action whereas dependencies between single coordinators is missing. In my humble opinion this funcitonality can be added with flagfiles.&lt;/P&gt;&lt;P&gt;I think, Oozie is still the best fit although it is cumbersome to handle via xml files. Of course there is the Eclipse plugin to visualize workflows and create them as well.&lt;/P&gt;&lt;P&gt;Feel free to correct my views. Thanks!&lt;/P&gt;</description>
      <pubDate>Mon, 13 Feb 2017 16:02:47 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Apache-Airflow/m-p/121343#M84106</guid>
      <dc:creator>jiiiiken88</dc:creator>
      <dc:date>2017-02-13T16:02:47Z</dc:date>
    </item>
    <item>
      <title>Re: Apache Airflow</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Apache-Airflow/m-p/121344#M84107</link>
      <description>&lt;P&gt;Falcon will manage Oozie.   And a Web UI instead of XML should be available soon if you don't find one out in the wild that you like.   A lot of companies are running Oozie with lots of different jobs and it works well.   If you are doing Sqoop, Pig and Hive it's your way to go.   With NiFi I run Sqoop, Pig, Spark, Python, TensorFlow and MXNet jobs and connect them.   I run them with cron timers and reactive when something happens (files appear, directories change, Kafka message arrives, MQTT message arrives, ...)&lt;/P&gt;&lt;P&gt;&lt;A href="https://community.hortonworks.com/articles/64844/running-apache-pig-scripts-from-apache-nifi-and-st.html"&gt;https://community.hortonworks.com/articles/64844/running-apache-pig-scripts-from-apache-nifi-and-st.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;A href="https://community.hortonworks.com/articles/73828/submitting-spark-jobs-from-apache-nifi-using-livy.html"&gt;https://community.hortonworks.com/articles/73828/submitting-spark-jobs-from-apache-nifi-using-livy.html&lt;/A&gt; &lt;/P&gt;&lt;P&gt;&lt;A href="https://community.hortonworks.com/content/kbentry/63228/monitoring-your-containers-with-sysdig-from-hdf-20.html"&gt;https://community.hortonworks.com/content/kbentry/63228/monitoring-your-containers-with-sysdig-from-hdf-20.html&lt;/A&gt; &lt;/P&gt;&lt;P&gt;&lt;A href="https://community.hortonworks.com/articles/81222/adding-stanford-corenlp-to-big-data-pipelines-apac.html"&gt;https://community.hortonworks.com/articles/81222/adding-stanford-corenlp-to-big-data-pipelines-apac.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;A href="https://community.hortonworks.com/articles/59349/hdf-20-flow-for-ingesting-real-time-tweets-from-st.html"&gt;https://community.hortonworks.com/articles/59349/hdf-20-flow-for-ingesting-real-time-tweets-from-st.html&lt;/A&gt;
&lt;A href="https://community.hortonworks.com/articles/61180/streaming-ingest-of-google-sheets-into-a-connected.html"&gt;https://community.hortonworks.com/articles/61180/streaming-ingest-of-google-sheets-into-a-connected.html&lt;/A&gt; 
&lt;A href="https://community.hortonworks.com/articles/64069/converting-a-large-json-file-into-csv.html"&gt;https://community.hortonworks.com/articles/59349/hdf-20-flow-for-ingesting-real-time-tweets-from-st.html&lt;/A&gt; 
&lt;A href="https://community.hortonworks.com/articles/64122/incrementally-streaming-rdbms-data-to-your-hadoop.html"&gt;https://community.hortonworks.com/articles/59349/hdf-20-flow-for-ingesting-real-time-tweets-from-st.html&lt;/A&gt;
&lt;A href="https://community.hortonworks.com/articles/72420/ingesting-remote-sensor-feeds-into-apache-phoenix.html"&gt;https://community.hortonworks.com/articles/72420/ingesting-remote-sensor-feeds-into-apache-phoenix.html&lt;/A&gt;
&lt;A href="https://community.hortonworks.com/articles/61717/ingesting-jms-messages-to-hdfs-via-hdf-20.html"&gt;https://community.hortonworks.com/articles/59349/hdf-20-flow-for-ingesting-real-time-tweets-from-st.html&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 13 Feb 2017 20:03:24 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Apache-Airflow/m-p/121344#M84107</guid>
      <dc:creator>TimothySpann</dc:creator>
      <dc:date>2017-02-13T20:03:24Z</dc:date>
    </item>
    <item>
      <title>Re: Apache Airflow</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Apache-Airflow/m-p/121345#M84108</link>
      <description>&lt;P&gt;Airflow maintainer here. Let me list some of the great things of Airflow that set it apart.&lt;/P&gt;&lt;P&gt;1. Configuration as code. Airflow uses python for the definitions of DAGs (I.e. workflows). This gives you the full power and flexibility of a programming language with a wealth of modules.&lt;/P&gt;&lt;P&gt;2. DAGs are testable and versionable. As they are in code you can integrate your workflow definitions into your CI/CD pipeline.&lt;/P&gt;&lt;P&gt;3. Ease of setup, local development. While Airflow gives you horizontal and vertical scaleability it also allows your developers to test and run locally, all from a single pip install Apache-airflow. This greatly enhances productivity and reproducibility.&lt;/P&gt;&lt;P&gt;4. Real Data sucks Airflow knows that so we have features for retrying and SLAs&lt;/P&gt;&lt;P&gt;5. Changing history. After a year you find out that you need to put a task into a dag, but it needs to run ‘in the past’. Airflow allows you to do backfills giving you the opportunity to rewrite history. And guess what, you more often need it than you think.&lt;/P&gt;&lt;P&gt;6. Great debugability. There are logs for everything, but nicely tied to the unit of work they are doing. Scheduler logs, DAG parsing/professing logs, task logs. Being in python the hurdle is quite low to jump in and do a fix yourself if needed.&lt;/P&gt;&lt;P&gt;7. A wealth of connectors that allow you to run tasks on kubernetes, Docker, spark, hive, presto, Druid, etc etc.&lt;/P&gt;&lt;P&gt;8. A very active community.&lt;/P&gt;&lt;P&gt;As to your question. There is no particular dependency between HDP and Airflow. If you make Ambari deploy the client libraries on your Airflow workers, it will work just fine.&lt;/P&gt;</description>
      <pubDate>Sat, 16 Dec 2017 05:50:14 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Apache-Airflow/m-p/121345#M84108</guid>
      <dc:creator>bdbruin</dc:creator>
      <dc:date>2017-12-16T05:50:14Z</dc:date>
    </item>
    <item>
      <title>Re: Apache Airflow</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Apache-Airflow/m-p/121346#M84109</link>
      <description>&lt;P&gt;Hi &lt;A rel="user" href="https://community.cloudera.com/users/9304/tspann.html" nodeid="9304"&gt;@Timothy Spann&lt;/A&gt;&lt;/P&gt;&lt;P&gt;I have made Mpack for Ambari with airflow service and posted it to GitHub. With this Mpack you're able to install and manage airflow in Ambari. &lt;/P&gt;&lt;P&gt;&lt;A href="https://github.com/miho120/ambari-airflow-mpack"&gt;https://github.com/miho120/ambari-airflow-mpack&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 22 Jan 2018 17:17:43 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Apache-Airflow/m-p/121346#M84109</guid>
      <dc:creator>mykola_mykhalov</dc:creator>
      <dc:date>2018-01-22T17:17:43Z</dc:date>
    </item>
    <item>
      <title>Re: Apache Airflow</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Apache-Airflow/m-p/121347#M84110</link>
      <description>&lt;P&gt;Thanks very much &lt;/P&gt;&lt;P&gt;Can you update mpack for run airflow from python virtual environment?&lt;/P&gt;</description>
      <pubDate>Fri, 16 Mar 2018 20:11:08 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Apache-Airflow/m-p/121347#M84110</guid>
      <dc:creator>ururu</dc:creator>
      <dc:date>2018-03-16T20:11:08Z</dc:date>
    </item>
    <item>
      <title>Re: Apache Airflow</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Apache-Airflow/m-p/121348#M84111</link>
      <description>&lt;P&gt;&lt;A rel="user" href="https://community.cloudera.com/users/63185/trinimak.html" nodeid="63185"&gt;@Ruslan Fialkovsky&lt;/A&gt; Sure. But could you create GitHub issue to not let me forget?&lt;/P&gt;</description>
      <pubDate>Mon, 19 Mar 2018 16:21:06 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Apache-Airflow/m-p/121348#M84111</guid>
      <dc:creator>mykola_mykhalov</dc:creator>
      <dc:date>2018-03-19T16:21:06Z</dc:date>
    </item>
    <item>
      <title>Re: Apache Airflow</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Apache-Airflow/m-p/121349#M84112</link>
      <description>&lt;P&gt;New version has been released. Now you're able to integrate airflow with virtual environment.&lt;/P&gt;&lt;P&gt;Also I wrote an article about airflow integration:&lt;/P&gt;&lt;P&gt;&lt;A href="https://medium.com/@mykolamykhalov/integrating-apache-airflow-with-apache-ambari-ccab2c90173" target="_blank"&gt;https://medium.com/@mykolamykhalov/integrating-apache-airflow-with-apache-ambari-ccab2c90173&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 26 Mar 2018 18:51:18 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Apache-Airflow/m-p/121349#M84112</guid>
      <dc:creator>mykola_mykhalov</dc:creator>
      <dc:date>2018-03-26T18:51:18Z</dc:date>
    </item>
    <item>
      <title>Re: Apache Airflow</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Apache-Airflow/m-p/121350#M84113</link>
      <description>&lt;P&gt;I gave you star at github&lt;/P&gt;</description>
      <pubDate>Thu, 29 Mar 2018 20:09:50 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Apache-Airflow/m-p/121350#M84113</guid>
      <dc:creator>ururu</dc:creator>
      <dc:date>2018-03-29T20:09:50Z</dc:date>
    </item>
    <item>
      <title>Re: Apache Airflow</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Apache-Airflow/m-p/121351#M84114</link>
      <description>&lt;P&gt;@&lt;A href="https://community.hortonworks.com/users/14981/bdbruin.html"&gt;Bolke de Bruin&lt;/A&gt; &lt;/P&gt;&lt;P&gt;Could you please provide the link or doc to install and configure airflow with HDP.  &lt;/P&gt;</description>
      <pubDate>Mon, 21 May 2018 15:01:28 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Apache-Airflow/m-p/121351#M84114</guid>
      <dc:creator>ashneesharma88</dc:creator>
      <dc:date>2018-05-21T15:01:28Z</dc:date>
    </item>
    <item>
      <title>Re: Apache Airflow</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Apache-Airflow/m-p/121352#M84115</link>
      <description>&lt;P&gt;anyone intergrated airflow with nifi &lt;A rel="user" href="https://community.cloudera.com/users/9304/tspann.html" nodeid="9304"&gt;@Timothy Spann&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 10 Apr 2019 20:45:27 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Apache-Airflow/m-p/121352#M84115</guid>
      <dc:creator>abinanths</dc:creator>
      <dc:date>2019-04-10T20:45:27Z</dc:date>
    </item>
    <item>
      <title>Re: Apache Airflow</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Apache-Airflow/m-p/121353#M84116</link>
      <description>&lt;P&gt;Has anyone integrated Apache Airflow and  Nifi&lt;/P&gt;</description>
      <pubDate>Wed, 10 Apr 2019 20:46:12 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Apache-Airflow/m-p/121353#M84116</guid>
      <dc:creator>abinanths</dc:creator>
      <dc:date>2019-04-10T20:46:12Z</dc:date>
    </item>
  </channel>
</rss>

