Hive Commands

Create Database

create database db_name;
use db_name;

Create Table

Syntex  

create table table_name                                                                                                            
    > (                                                                                                                                       
    >col_name data_type,                                                                                                                            
    >col_name data_type)                                                                                                                                                                                                                                                
    > row format delimited                                                                                                                    
    > fields terminated by ','; 

Example 

create table department_data                                                                                                            
    > (                                                                                                                                       
    > dept_id int,                                                                                                                            
    > dept_name string,                                                                                                                       
    > manager_id int,                                                                                                                         
    > salary int)                                                                                                                             
    > row format delimited                                                                                                                    
    > fields terminated by ',';

Describe Table

describe table_name;
describe formatted table_name;

Drop Table

Syntex

DROP TABLE [IF EXISTS] table_name;

Example

DROP TABLE IF EXISTS employee;

Truncate Table

Syntex

TRUNCATE TABLE tabe_name;

Example

TRUNCATE TABLE employee;

For data load from local

Syntex 
load data local inpath 'file path' into table table_name; 

Example 
load data local inpath 'file:///tmp/hive_class/depart_data.csv' into table department_data;

Load data from hdfs location

Syntex 
load data inpath 'hadoop file location' into table table_name;

Example  
load data inpath '/tmp/hive_data_class_2/' into table department_data_from_hdfs;

Display column name

set hive.cli.print.header = true;

Create external table

Syntex  

create external table table_name                                                                                          
    > (                                                                                                                                       
    > col_name data_type,                                                                                                                            
    > col_name data_type                                                                                                                            
    > )                                                                                                                                       
    > row format delimited                                                                                                                    
    > fields terminated by ','                                                                                                                
    > location 'source_file_location'; 

Example  

create external table department_data_external                                                                                          
    > (                                                                                                                                       
    > dept_id int,                                                                                                                            
    > dept_name string,                                                                                                                       
    > manager_id int,                                                                                                                         
    > salary int                                                                                                                              
    > )                                                                                                                                       
    > row format delimited                                                                                                                    
    > fields terminated by ','                                                                                                                
    > location '/tmp/hive_data_class_2/';

Work with Array data types

Example 

create table employee                                                                                                                   
    > (                                                                                                                                       
    > id int,                                                                                                                                 
    > name string,                                                                                                                            
    > skills array<string>                                                                                                                    
    > )                                                                                                                                       
    > row format delimited                                                                                                                    
    > fields terminated by ','                                                                                                                
    > collection items terminated by ':';

Get element by index in hive array data type

Syntex  

select array[index] from table_name;

Example 

select skills[0] as prime_skill from employee;

Table for map data

Example

create table employee_map_data                                                                                                          
    > (                                                                                                                                       
    > id int,                                                                                                                                 
    > name string,                                                                                                                            
    > details map<string,string>                                                                                                              
    > )                                                                                                                                       
    > row format delimited                                                                                                                    
    > fields terminated by ','                                                                                                                
    > collection items terminated by '|'                                                                                                      
    > map keys terminated by ':'; 

Get data

 select                                                                                                                                  
    > id,                                                                                                                                     
    > name,                                                                                                                                   
    > details["gender"] as employee_gender                                                                                                    
    > from employee_map_data; 

Map functions

select                                                                                                                                  
    > id,                                                                                                                                     
    > name,                                                                                                                                   
    > details,                                                                                                                                
    > size(details) as size_of_each_map,                                                                                                      
    > map_keys(details) as distinct_map_keys,                                                                                                 
    > map_values(details) as distinct_map_values                                                                                              
    > from employee_map_data;

Command to create identical table

Syntex

create table table_2 as select * from table_1;

Create table using different file formats

Parquet 

create table table_name                                                                                                        
    > (                                                                                                                                       
    >col_name data_type,                                                                                                                    
    > col_name data_type                                                                                                                         
    > )                                                                                                                                       
    > stored as parquet; 

Orc

 create table table_name                                                                                                        
    > (                                                                                                                                       
    >col_name data_type,                                                                                                                    
    > col_name data_type                                                                                                                         
    > )                                                                                                                                       
    > stored as orc; 

Avro

create table table_name                                                                                                        
    > (                                                                                                                                       
    >col_name data_type,                                                                                                                    
    > col_name data_type                                                                                                                         
    > )                                                                                                                                       
    > stored as avro; 

CSV File

create table table_name                                                                                                        
    > (                                                                                                                                       
    >col_name data_type,                                                                                                                    
    > col_name data_type                                                                                                                         
    > )   
    > row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'                                                                                                                                    
    > stored as textfile
    > tblproperties ("skip.header.line.count" = "1"); 

JSON File

create table table_name                                                                                                        
    > (                                                                                                                                       
    >col_name data_type,                                                                                                                    
    > col_name data_type                                                                                                                         
    > )   
    > row format serde 'org.apache.hive.hcatalog.data.JsonSerDe'                                                                                                                                   
    > stored as textfile;

Change number of reducers

Syntex 

 set mapreduce.job.reduces=no of reducers;

Hive Partition

Static Partition

set hive.mapred.mode=strict;

create table static_table                                                                                                     
    > (                                                                                                                                       
    > col_name data_type,                                                                                                                        
    > col_name data_type                                                                                                                                                                                                                                                
    > )                                                                                                                                       
    > partitioned by (col_name data_type); 

insert overwrite table static_table partition(col_name = value) select col_name data_type, col_name data_type from table_name where col_name = value;

Dynamic Partition

set hive.exec.dynamic.partition.mode=nonstrict; 

create table dynamic_table                                                                                                     
    > (                                                                                                                                       
    > col_name data_type,                                                                                                                        
    > col_name data_type                                                                                                                                                                                                                                                
    > )                                                                                                                                       
    > partitioned by (col_name data_type); 

insert overwrite table dynamic_table partition(col_name) select col_name data_type, col_name data_type from table_name where col_val = value;

Multilevel Partition

create table multi_level_table                                                                                                     
    > (                                                                                                                                       
    > col_name data_type,                                                                                                                        
    > col_name data_type                                                                                                                                                                                                                                                
    > )                                                                                                                                       
    > partitioned by (col_name data_type,col_name data_type);

insert overwrite table multi_level_table partition(col1_name,col2_name) select col_name data_type, col_name data_type from table_name;

Add udf(user defined functions) in hive shell

add file file_path;

Statement to execute python udf

select transform(function_params) using 'python python_file_name.py' as (receive function output) from table_name limit 5;

Bucketing in Hive

set hive.enforce.bucketing=true;

Syntex

create table buck_users                                                                                                                 
    > (                                                                                                                                       
    > col_name data_type,                                                                                                                                       
    > col_name data_type                                                                                                                                                                                                                                                     
    > )                                                                                                                                       
    > clustered by (id)                                                                                                                       
    > sorted by (id)                                                                                                                          
    > into bucket_number buckets;

Example

create table buck_users                                                                                                                 
    > (                                                                                                                                       
    > id int,                                                                                                                                 
    > name string,                                                                                                                            
    > salary int,                                                                                                                             
    > unit string                                                                                                                             
    > )                                                                                                                                       
    > clustered by (id)                                                                                                                       
    > sorted by (id)                                                                                                                          
    > into 2 buckets;

Hive Joins (same as SQL Joins)

inner joins
left join
right join
full outer join

Hive Optimized Joins

Reduce-Side Join

SET hive.auto.convert.join=false;

Syntex 

SELECT * FROM table_1 u INNER JOIN table_2 l ON u.id = l.id;

Example

SELECT * FROM buck_users u INNER JOIN buck_locations l ON u.id = l.id;

Map Side Join

SET hive.auto.convert.join=true;

Syntex 

SELECT * FROM table_1 u INNER JOIN table_2 l ON u.id = l.id;

Example

SELECT * FROM buck_users u INNER JOIN buck_locations l ON u.id = l.id;

Bucket Map Join

set hive.optimize.bucketmapjoin=true;
SET hive.auto.convert.join=true;

Syntex 

SELECT * FROM table_1 u INNER JOIN table_2 l ON u.id = l.id;

Example

SELECT * FROM buck_users u INNER JOIN buck_locations l ON u.id = l.id;

Sorted Merge Bucket Map Join

set hive.enforce.sortmergebucketmapjoin=false;
set hive.auto.convert.sortmerge.join=true;
set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;
SET hive.auto.convert.join=false;

Syntex 

SELECT * FROM table_1 u INNER JOIN table_2 l ON u.id = l.id;

Example

SELECT * FROM buck_users u INNER JOIN buck_locations l ON u.id = l.id;

Basic Hive Commands

Table of contents

Create Database

Create Table

Describe Table

Drop Table

Truncate Table

For data load from local

Load data from hdfs location

Display column name

Create external table

Work with Array data types

Get element by index in hive array data type

Table for map data

Command to create identical table

Create table using different file formats

Change number of reducers

Hive Partition

Add udf(user defined functions) in hive shell

Bucketing in Hive

Hive Joins (same as SQL Joins)

Hive Optimized Joins

Basic Hive Commands

Table of contents

Create Database

Create Table

Describe Table

Drop Table

Truncate Table

For data load from local

Load data from hdfs location

Display column name

Create external table

Work with Array data types

Get element by index in hive array data type

Table for map data

Command to create identical table

Create table using different file formats

Change number of reducers

Hive Partition

Add udf(user defined functions) in hive shell

Bucketing in Hive

Hive Joins (same as SQL Joins)

Hive Optimized Joins

Did you find this article valuable?