SET hive.auto.convert.join=true; SELECT * FROM user_plays_buck INNER JOIN small_user_subscription_buck ON user_plays_buck.subscription_id = small_user_subscription_buck.subscription_id LIMIT 1; ABSTRACT SYNTAX TREE: TOK_QUERY TOK_FROM TOK_JOIN TOK_TABREF TOK_TABNAME user_plays_buck TOK_TABREF TOK_TABNAME small_user_subscription_buck = . TOK_TABLE_OR_COL user_plays_buck subscription_id . TOK_TABLE_OR_COL small_user_subscription_buck subscription_id TOK_INSERT TOK_DESTINATION TOK_DIR TOK_TMP_FILE TOK_SELECT TOK_SELEXPR TOK_ALLCOLREF TOK_LIMIT 1 STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-4 Map Reduce Local Work Alias -> Map Local Tables: small_user_subscription_buck Fetch Operator limit: -1 Alias -> Map Local Operator Tree: small_user_subscription_buck TableScan alias: small_user_subscription_buck filterExpr: subscription_id is not null (type: boolean) Statistics: Num rows: 300000 Data size: 113325000 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: subscription_id is not null (type: boolean) Statistics: Num rows: 150000 Data size: 56662500 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 subscription_id (type: bigint) 1 subscription_id (type: bigint) Position of Big Table: 0 Stage: Stage-3 Map Reduce Map Operator Tree: TableScan alias: user_plays_buck filterExpr: subscription_id is not null (type: boolean) Statistics: Num rows: 50000001 Data size: 9850000197 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: subscription_id is not null (type: boolean) Statistics: Num rows: 25000001 Data size: 4925000197 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 subscription_id (type: bigint) 1 subscription_id (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col7, _col8, _col9, _col10, _col11 Position of Big Table: 0 Statistics: Num rows: 27500001 Data size: 5417500334 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col7 (type: bigint), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 27500001 Data size: 5417500334 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 Statistics: Num rows: 1 Data size: 197 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 directory: hdfs://bbsr02cloud06.ad.infosys.com:8020/tmp/hive/amit_mishra09/7a9d7ddd-a147-4a4c-a9c1-8a83645acd50/hive_2016-06-08_18-23-37_172_8302650945126002634-1/-mr-10000/.hive-staging_hive_2016-06-08_18-23-37_172_8302650945126002634-1/-ext-10001 NumFilesPerFileSink: 1 Statistics: Num rows: 1 Data size: 197 Basic stats: COMPLETE Column stats: NONE Stats Publishing Key Prefix: hdfs://bbsr02cloud06.ad.infosys.com:8020/tmp/hive/amit_mishra09/7a9d7ddd-a147-4a4c-a9c1-8a83645acd50/hive_2016-06-08_18-23-37_172_8302650945126002634-1/-mr-10000/.hive-staging_hive_2016-06-08_18-23-37_172_8302650945126002634-1/-ext-10001/ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 columns.types bigint:string:string:int:bigint:string:string:string:string escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TotalFiles: 1 GatherStats: false MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: hdfs://bbsr02cloud06.ad.infosys.com:8020/apps/hive/warehouse/rohit_db.db/user_plays_buck [user_plays_buck] Path -> Partition: hdfs://bbsr02cloud06.ad.infosys.com:8020/apps/hive/warehouse/rohit_db.db/small_user_subscription_buck Partition base file name: small_user_subscription_buck input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name subscription_id columns subscription_id,subscription_mode,subscription_type,subscription_start_dt,subscription_end_dt columns.comments columns.types bigint:string:string:string:string file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat location hdfs://bbsr02cloud06.ad.infosys.com:8020/apps/hive/warehouse/rohit_db.db/small_user_subscription_buck name rohit_db.small_user_subscription_buck numFiles 16 numRows 300000 rawDataSize 113325000 serialization.ddl struct small_user_subscription_buck { i64 subscription_id, string subscription_mode, string subscription_type, string subscription_start_dt, string subscription_end_dt} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde totalSize 1246703 transient_lastDdlTime 1465290396 serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name subscription_id columns subscription_id,subscription_mode,subscription_type,subscription_start_dt,subscription_end_dt columns.comments columns.types bigint:string:string:string:string file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat location hdfs://bbsr02cloud06.ad.infosys.com:8020/apps/hive/warehouse/rohit_db.db/small_user_subscription_buck name rohit_db.small_user_subscription_buck numFiles 16 numRows 300000 rawDataSize 113325000 serialization.ddl struct small_user_subscription_buck { i64 subscription_id, string subscription_mode, string subscription_type, string subscription_start_dt, string subscription_end_dt} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde totalSize 1246703 transient_lastDdlTime 1465290396 serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: rohit_db.small_user_subscription_buck name: rohit_db.small_user_subscription_buck hdfs://bbsr02cloud06.ad.infosys.com:8020/apps/hive/warehouse/rohit_db.db/user_plays_buck Partition base file name: user_plays_buck input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name subscription_id columns subscription_id,song_id,station_id,play_duration columns.comments columns.types bigint:string:string:int file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat location hdfs://bbsr02cloud06.ad.infosys.com:8020/apps/hive/warehouse/rohit_db.db/user_plays_buck name rohit_db.user_plays_buck numFiles 16 numRows 50000001 rawDataSize 9850000197 serialization.ddl struct user_plays_buck { i64 subscription_id, string song_id, string station_id, i32 play_duration} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde totalSize 383522795 transient_lastDdlTime 1465196717 serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name subscription_id columns subscription_id,song_id,station_id,play_duration columns.comments columns.types bigint:string:string:int file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat location hdfs://bbsr02cloud06.ad.infosys.com:8020/apps/hive/warehouse/rohit_db.db/user_plays_buck name rohit_db.user_plays_buck numFiles 16 numRows 50000001 rawDataSize 9850000197 serialization.ddl struct user_plays_buck { i64 subscription_id, string song_id, string station_id, i32 play_duration} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde totalSize 383522795 transient_lastDdlTime 1465196717 serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: rohit_db.user_plays_buck name: rohit_db.user_plays_buck Truncated Path -> Alias: /rohit_db.db/user_plays_buck [user_plays_buck] Execution mode: vectorized Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink