CREATE DATABASE IF NOT EXISTS myname; CREATE DATABASE IF NOT EXISTS myname LOCATION '/user/yanfei/hive'; -- Location here is the HDFS path where the data for the database will be stored. Default is /user/hive/warehouse/. DROP DATABASE IF EXISTS myname;
SHOW DATABASES;. SHOW DATABASES LIKE "d*";. USE myname.
1 2 3 4 5 6 7 8 9 10 11 12 13
CREATE TABLE IF NOT EXISTS myname.employees ( name STRING COMMENT 'Employee name', salary FLOAT COMMENT 'Employee salary' ) COMMENT 'Description of the table' TBLPROPERTIES ('creator'='me', 'created_at'='2012-01-02 10:00:00');
SHOW TABLES; DESCRIBE myname.employees;
ALTER TABLE employees RENAME TO employees; ALTER TABLE employees ADD COLUMNS (age int);
DROP TABLE employees;
1 2 3
SHOW FUNCTIONS [like "str*"]; SHOW DATABASES [like "h*"]; SHOW TABLES [like "m*"];
1 2 3
DESCRIBE DATABASE myname; DESCRIBE employees; DESCRIBE FUNCTION like;
DML
假设现在有文件/home/usr/data/stocks.txt
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
use myname;
create external table if not exists stocks ( symbol string, ymd string, price_open float, price_high float, price_low float, price_close float, volume int, price_adj_close float ) row format delimited fields terminated by ',' LOCATION '/data/';
LOAD DATA LOCAL INPATH '/home/usr/data/stocks.txt' OVERWRITE INTO TABLE stocks;
-- LOCAL 代表文件在服务器文件系统,去掉LOCAL后,代表文件在HDFS
Hive stores tables files by default at /user/hive/warehouse location on HDFS.
for line in sys.stdin: line = line.strip() userid, movieid, rating, unixtime = line.split('\t') weekday = datetime.datetime.fromtimestamp(float(unixtime)).isoweekday() print('\t'.join([userid, movieid, rating, str(weekday)]))
sort by 和 order by 的区别:order by 会对输入做全局排序,因此只有一个reducer(多个reducer无法保证全局有序)只有一个reducer,会导致当输入规模较大时,需要较长的计算时间。sort by不是全局排序,其在数据进入reducer前完成排序. 因此,如果用sort by进行排序,并且设置mapred.reduce.tasks>1, 则sort by只保证每个reducer的输出有序,不保证全局有序。