Member since 
    
	
		
		
		07-21-2014
	
	
	
	
	
	
	
	
	
	
	
	
	
	
			
      
                141
            
            
                Posts
            
        
                8
            
            
                Kudos Received
            
        
                3
            
            
                Solutions
            
        My Accepted Solutions
| Title | Views | Posted | 
|---|---|---|
| 3224 | 02-01-2017 04:49 PM | |
| 2604 | 01-15-2015 01:57 PM | |
| 3080 | 01-05-2015 12:59 PM | 
			
    
	
		
		
		10-08-2018
	
		
		12:21 AM
	
	
	
	
	
	
	
	
	
	
	
	
	
	
		
	
				
		
			
					
	
		1 Kudo
		
	
				
		
	
		
					
							 @buntu     Try this:  hive> CREATE VIEW log_view PARTITIONED ON (pagename,year,month,day) AS SELECTuid,properties,pagename year,month,day FROM log;      Reason:  The column names used in the partition must be available at the end of view creation in the same order as mentioned in as partitions. 
						
					
					... View more
				
			
			
			
			
			
			
			
			
			
		
			
    
	
		
		
		04-12-2018
	
		
		06:33 PM
	
	
	
	
	
	
	
	
	
	
	
	
	
	
		
	
				
		
			
					
				
		
	
		
					
							   In fact, we can use jackson to solve this problem, and it is universal to any json data.      morphlines: [
  {
    id: convertJsonToAvro
    importCommands: [ "org.kitesdk.**" ]
    commands: [
      # read the JSON blob
      { readJson: {} }
	  
	  # java code
	  {
			  java { 
					imports : """
					  import com.fasterxml.jackson.databind.JsonNode;
					  import com.fasterxml.jackson.databind.ObjectMapper;
					  import org.kitesdk.morphline.base.Fields;
					  import java.io.IOException;
					  import java.util.Set;
					  import java.util.ArrayList;
					  import java.util.Iterator;
					  import java.util.List;
					  import java.util.Map;
					"""
					code : """
					  String jsonStr = record.getFirstValue(Fields.ATTACHMENT_BODY).toString();
					  ObjectMapper mapper = new ObjectMapper();
					  Map<String, Object> map = null;
					  try {
						  map = (Map<String, Object>)mapper.readValue(jsonStr, Map.class);
					  } catch (IOException e) {
						  e.printStackTrace();
					  }
					  Set<String> keySet = map.keySet();
					  for (String o : keySet) {
						  record.put(o, map.get(o));
					  }
					  return child.process(record);                   
					"""
	 
			  }
	  }
      
      # convert the extracted fields to an avro object
      # described by the schema in this field
      { toAvro {
        schemaFile: /etc/flume/conf/a1/like_user_event_realtime.avsc
      } }
      
      #{ logInfo { format : "loginfo: {}", args : ["@{}"] } }
  
      # serialize the object as avro
      { writeAvroToByteArray: {
        format: containerlessBinary
      } }
  
    ]
  }
]
   
						
					
					... View more
				
			
			
			
			
			
			
			
			
			
		
			
    
	
		
		
		03-30-2017
	
		
		01:22 AM
	
	
	
	
	
	
	
	
	
	
	
	
	
	
		
	
				
		
			
					
	
		1 Kudo
		
	
				
		
	
		
					
							 Check  node IP address is listed in the file that is pointing yarn.resourcemanager.nodes.include-path (Path to file with nodes to include). Make sure that you starting nodemanager with correct user permissions 
						
					
					... View more
				
			
			
			
			
			
			
			
			
			
		
			
    
	
		
		
		02-01-2017
	
		
		04:49 PM
	
	
	
	
	
	
	
	
	
	
	
	
	
	
		
	
				
		
			
					
				
		
	
		
					
							 Ok, I do notice the CDH 5.10 parcel and requires Cloudera Manager to be updated before updating CDH parcel. 
						
					
					... View more
				
			
			
			
			
			
			
			
			
			
		
			
    
	
		
		
		01-17-2017
	
		
		03:30 PM
	
	
	
	
	
	
	
	
	
	
	
	
	
	
		
	
				
		
			
					
	
		1 Kudo
		
	
				
		
	
		
					
							 Currently cloudera does not have a parcel with R present in it.     If you are trying to run it with spark, here is a good discussion about it.  https://community.cloudera.com/t5/Advanced-Analytics-Apache-Spark/SparkR-in-CDH-5-5/td-p/34602 
						
					
					... View more
				
			
			
			
			
			
			
			
			
			
		
			
    
	
		
		
		01-12-2017
	
		
		04:24 PM
	
	
	
	
	
	
	
	
	
	
	
	
	
	
		
	
				
		
			
					
				
		
	
		
					
							 Given the size of the dataset, I believe the data fits in memory and its not providing any additional performance improvement.     Thanks! 
						
					
					... View more
				
			
			
			
			
			
			
			
			
			
		
			
    
	
		
		
		11-16-2017
	
		
		10:06 PM
	
	
	
	
	
	
	
	
	
	
	
	
	
	
		
	
				
		
			
					
				
		
	
		
					
							 incrementalstream-1.xml 
						
					
					... View more
				
			
			
			
			
			
			
			
			
			
		
			
    
	
		
		
		05-02-2017
	
		
		06:29 PM
	
	
	
	
	
	
	
	
	
	
	
	
	
	
		
	
				
		
			
					
				
		
	
		
					
							 Thanks, this is very useful.   How would one go about getting the application name? Is it the app name or the app ID or something else? Thanks! 
						
					
					... View more
				
			
			
			
			
			
			
			
			
			
		
			
    
	
		
		
		02-22-2016
	
		
		03:08 PM
	
	
	
	
	
	
	
	
	
	
	
	
	
	
		
	
				
		
			
					
				
		
	
		
					
							"Not supported" means you can't file support tickets for it. It's  shipped and works though.  
						
					
					... View more
				
			
			
			
			
			
			
			
			
			
		
			
    
	
		
		
		09-18-2015
	
		
		04:23 AM
	
	
	
	
	
	
	
	
	
	
	
	
	
	
		
	
				
		
			
					
				
		
	
		
					
							Glad to hear you were able to figure it out. In spirit of https://xkcd.com/979/, please mark the thread solved with the solution post selected, so others with a similar problem can find their solution quicker on the web.
						
					
					... View more