<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: Regex doesn't work on ExtractText Processor? in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/Regex-doesn-t-work-on-ExtractText-Processor/m-p/353514#M236735</link>
    <description>&lt;P&gt;Hi &lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/95347"&gt;@rafy&lt;/a&gt;,&amp;nbsp;&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;&lt;P&gt;I tried the same regex with the same sample in 1.13.2 and 1.16.3 and both resulted in image url string.&lt;BR /&gt;&lt;BR /&gt;There can be a case with nifi JSON beautificator, the initial JSON lacks spaces and line breaks.&lt;BR /&gt;&lt;BR /&gt;Expression that works with&amp;nbsp;&lt;A href="https://ll.thespacedevs.com/2.0.0/launch/" target="_blank"&gt;https://ll.thespacedevs.com/2.0.0/launch/&lt;/A&gt;&amp;nbsp;is:&lt;BR /&gt;&lt;STRONG&gt;(?&amp;lt;=\"image\":\")[A-Z-a-z-0-9\-\:\/\.\_]+&lt;/STRONG&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;&lt;P&gt;and as&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/80381"&gt;@SAMSAL&lt;/a&gt; said, EvaluateJsonPath is the right tool for this job.&lt;/P&gt;</description>
    <pubDate>Wed, 28 Sep 2022 11:07:50 GMT</pubDate>
    <dc:creator>stanere</dc:creator>
    <dc:date>2022-09-28T11:07:50Z</dc:date>
    <item>
      <title>Regex doesn't work on ExtractText Processor?</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Regex-doesn-t-work-on-ExtractText-Processor/m-p/353276#M236676</link>
      <description>&lt;P&gt;Hello,&lt;/P&gt;&lt;P&gt;I am trying to extract value from a key/value pair json record. I am trying to extract the value of "image" key.&amp;nbsp; I used an ExtractText processor but there was no match. Although, there was a match when i used an online regex extractor(&lt;A href="https://onlinetexttools.com/extract-regex-matches-from-text" target="_blank"&gt;https://onlinetexttools.com/extract-regex-matches-from-text&lt;/A&gt;).&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Regex i used in the extracttext processor:&amp;nbsp;&lt;STRONG&gt;(?&amp;lt;=\"image\"\s:\s")[A-Z-a-z-0-9\-\:\/\.\_]+&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;My json record:&lt;/STRONG&gt;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;{
  "id" : "03ee73b8-a553-4575-8dfa-d0da4e7939e9",
  "url" : "https://ll.thespacedevs.com/2.0.0/launch/03ee73b8-a553-4575-8dfa-d0da4e7939e9/",
  "launch_library_id" : null,
  "slug" : "falcon-9-block-5-galaxy-33-34",
  "name" : "Falcon 9 Block 5 | Galaxy 33 &amp;amp; 34",
  "status" : {
    "id" : 2,
    "name" : "TBD"
  },
  "net" : "2022-10-05T23:07:00Z",
  "window_end" : "2022-10-06T00:14:00Z",
  "window_start" : "2022-10-05T23:07:00Z",
  "inhold" : false,
  "tbdtime" : false,
  "tbddate" : false,
  "probability" : null,
  "holdreason" : "",
  "failreason" : "",
  "hashtag" : null,
  "launch_service_provider" : {
    "id" : 121,
    "url" : "https://ll.thespacedevs.com/2.0.0/agencies/121/",
    "name" : "SpaceX",
    "type" : "Commercial"
  },
  "rocket" : {
    "id" : 7549,
    "configuration" : {
      "id" : 164,
      "launch_library_id" : 188,
      "url" : "https://ll.thespacedevs.com/2.0.0/config/launcher/164/",
      "name" : "Falcon 9",
      "family" : "Falcon",
      "full_name" : "Falcon 9 Block 5",
      "variant" : "Block 5"
    }
  },
  "mission" : {
    "id" : 5976,
    "launch_library_id" : null,
    "name" : "Galaxy 33 &amp;amp; 34",
    "description" : "Galaxy 33, 34 are two geostationary communications satellites manufactured by Northrop Grumman and operated by Intelsat.",
    "launch_designator" : null,
    "type" : "Communications",
    "orbit" : {
      "id" : 2,
      "name" : "Geostationary Transfer Orbit",
      "abbrev" : "GTO"
    }
  },
  "pad" : {
    "id" : 80,
    "url" : "https://ll.thespacedevs.com/2.0.0/pad/80/",
    "agency_id" : 121,
    "name" : "Space Launch Complex 40",
    "info_url" : null,
    "wiki_url" : "https://en.wikipedia.org/wiki/Cape_Canaveral_Air_Force_Station_Space_Launch_Complex_40",
    "map_url" : "http://maps.google.com/maps?q=28.56194122,-80.57735736",
    "latitude" : "28.56194122",
    "longitude" : "-80.57735736",
    "location" : {
      "id" : 12,
      "url" : "https://ll.thespacedevs.com/2.0.0/location/12/",
      "name" : "Cape Canaveral, FL, USA",
      "country_code" : "USA",
      "map_image" : "https://spacelaunchnow-prod-east.nyc3.digitaloceanspaces.com/media/launch_images/location_12_20200803142519.jpg",
      "total_launch_count" : 858,
      "total_landing_count" : 24
    },
    "map_image" : "https://spacelaunchnow-prod-east.nyc3.digitaloceanspaces.com/media/launch_images/pad_80_20200803143323.jpg",
    "total_launch_count" : 154
  },
  "webcast_live" : false,
  "image" : "https://spacelaunchnow-prod-east.nyc3.digitaloceanspaces.com/media/launcher_images/falcon_9_block__image_20210506060831.jpg",
  "infographic" : null,
  "program" : [ ]
}&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;Expected output:&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;A href="https://spacelaunchnow-prod-east.nyc3.digitaloceanspaces.com/media/launcher_images/falcon_9_block__image_20210506060831.jpg" target="_blank"&gt;https://spacelaunchnow-prod-east.nyc3.digitaloceanspaces.com/media/launcher_images/falcon_9_block__image_20210506060831.jpg&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thanks for your help.&lt;/P&gt;</description>
      <pubDate>Tue, 21 Apr 2026 07:49:46 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Regex-doesn-t-work-on-ExtractText-Processor/m-p/353276#M236676</guid>
      <dc:creator>rafy</dc:creator>
      <dc:date>2026-04-21T07:49:46Z</dc:date>
    </item>
    <item>
      <title>Re: Regex doesn't work on ExtractText Processor?</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Regex-doesn-t-work-on-ExtractText-Processor/m-p/353323#M236690</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;You dont have to use the ExtractText processor for this. Use the EvaluateJsonPath processor with the following configuration:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="SAMSAL_0-1664202157672.png" style="width: 400px;"&gt;&lt;img src="https://community.cloudera.com/t5/image/serverpage/image-id/35749i8714969174F9B473/image-size/medium?v=v2&amp;amp;px=400" role="button" title="SAMSAL_0-1664202157672.png" alt="SAMSAL_0-1664202157672.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;If you find this helpful please accept solution.&lt;/P&gt;&lt;P&gt;Thanks&lt;/P&gt;</description>
      <pubDate>Mon, 26 Sep 2022 14:23:08 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Regex-doesn-t-work-on-ExtractText-Processor/m-p/353323#M236690</guid>
      <dc:creator>SAMSAL</dc:creator>
      <dc:date>2022-09-26T14:23:08Z</dc:date>
    </item>
    <item>
      <title>Re: Regex doesn't work on ExtractText Processor?</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Regex-doesn-t-work-on-ExtractText-Processor/m-p/353514#M236735</link>
      <description>&lt;P&gt;Hi &lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/95347"&gt;@rafy&lt;/a&gt;,&amp;nbsp;&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;&lt;P&gt;I tried the same regex with the same sample in 1.13.2 and 1.16.3 and both resulted in image url string.&lt;BR /&gt;&lt;BR /&gt;There can be a case with nifi JSON beautificator, the initial JSON lacks spaces and line breaks.&lt;BR /&gt;&lt;BR /&gt;Expression that works with&amp;nbsp;&lt;A href="https://ll.thespacedevs.com/2.0.0/launch/" target="_blank"&gt;https://ll.thespacedevs.com/2.0.0/launch/&lt;/A&gt;&amp;nbsp;is:&lt;BR /&gt;&lt;STRONG&gt;(?&amp;lt;=\"image\":\")[A-Z-a-z-0-9\-\:\/\.\_]+&lt;/STRONG&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;&lt;P&gt;and as&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/80381"&gt;@SAMSAL&lt;/a&gt; said, EvaluateJsonPath is the right tool for this job.&lt;/P&gt;</description>
      <pubDate>Wed, 28 Sep 2022 11:07:50 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Regex-doesn-t-work-on-ExtractText-Processor/m-p/353514#M236735</guid>
      <dc:creator>stanere</dc:creator>
      <dc:date>2022-09-28T11:07:50Z</dc:date>
    </item>
    <item>
      <title>Re: Regex doesn't work on ExtractText Processor?</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Regex-doesn-t-work-on-ExtractText-Processor/m-p/353516#M236736</link>
      <description>&lt;P&gt;Thank you all.&lt;/P&gt;&lt;P&gt;I eventually evaluated the json path to extract the url. My mind was astray as i was using complex solution to a simple problem.&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 28 Sep 2022 11:47:15 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Regex-doesn-t-work-on-ExtractText-Processor/m-p/353516#M236736</guid>
      <dc:creator>rafy</dc:creator>
      <dc:date>2022-09-28T11:47:15Z</dc:date>
    </item>
  </channel>
</rss>

