#创建数据库表
CREATE TABLE logs(
  host STRING,
  tmp STRING,
  users STRING,
  time STRING,
  request STRING,
  status STRING,
  size STRING,
  referer STRING,
  agent STRING)
ROW FORMAT SERDE 'org.apache.hadoop.hive.contrib.serde2.RegexSerDe'
WITH SERDEPROPERTIES (
  "input.regex" = "([^ ]*) ([^ ]*) ([^ ]*) \\[(.*)\\] \"(.*?)\" (-|[0-9]*) (-|[0-9]*) \"(.*?)\" \"(.*?)\"",
  "output.format.string" = "%1$s %2$s %3$s %4$s %5$s %6$s %7$s %8$s %9$s"
)
STORED AS TEXTFILE;

#导入日志数据
LOAD DATA LOCAL INPATH '/home/fang/Downloads/access.log' OVERWRITE INTO TABLE logs;

#检查数据导入
select * from logs limit 100;

发表回复

您的邮箱地址不会被公开。 必填项已用 * 标注