hive面试题及解题思路解析
2018-08-10 10:41:35      个评论

```现有这么一批数据，现要求出：

A,2015-01,5
A,2015-01,15
B,2015-01,5
A,2015-01,8
B,2015-01,25
A,2015-01,5
A,2015-02,4
A,2015-02,6
B,2015-02,10
B,2015-02,5
A,2015-03,16
A,2015-03,22
B,2015-03,23
B,2015-03,10
B,2015-03,11

A   2015-01     33          33        33
A   2015-02     33          43        10
A   2015-03     38          81        38
B   2015-01     30          30        30
B   2015-02     30          45                15
B   2015-03     44          89        44
```
```

data1.txt

A,2015-01,5
A,2015-01,15
B,2015-01,5
A,2015-01,8
B,2015-01,25
A,2015-01,5
A,2015-02,4
A,2015-02,6
B,2015-02,10
B,2015-02,5
A,2015-03,16
A,2015-03,22
B,2015-03,23
B,2015-03,10
B,2015-03,11

create database if not exists exercise;
use exercise;
drop table if exists exercise1;
create table if not exists exercise1(name string, month string, visit int)
row format delimited fields terminated by ",";
load data local inpath "/home/hadoop/data1.txt" into table exercise1;
select * from exercise1;```

```

A	2015-01	33
A	2015-02	10
A	2015-03	38

create table exercise1_step1 as select name,month, sum(visit) as visit from exercise1 group by name,month;
select * from exercise1_step1;   // 6 行 3 列```

```

create table exercise1_step2 as
select  a.name as namea, a.month as montha, a.visit as visita,
b.name as nameb, b.month as monthb, b.visit as visitb
from exercise1_step1 a join exercise1_step1 b on a.name = b.name;```
```
select * from exercise1_step2; ```

```

```

```
// 建表语句：
CREATE TABLE `course` (
`id` int(11) NOT NULL AUTO_INCREMENT PRIMARY KEY,
`sid` int(11) DEFAULT NULL,
`course` varchar(255) DEFAULT NULL,
`score` int(11) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

// 插入数据
// 字段解释：id, 学号， 课程， 成绩
INSERT INTO `course` VALUES (1, 1, 'yuwen', 43);
INSERT INTO `course` VALUES (2, 1, 'shuxue', 55);
INSERT INTO `course` VALUES (3, 2, 'yuwen', 77);
INSERT INTO `course` VALUES (4, 2, 'shuxue', 88);
INSERT INTO `course` VALUES (5, 3, 'yuwen', 98);
INSERT INTO `course` VALUES (6, 3, 'shuxue', 65);

```

data2.txt

1	1	yuwen	43
2	1	shuxue	55
3	2	yuwen	77
4	2	shuxue	88
5	3	yuwen	98
6	3	shuxue	65
7	3	yingyu	88

create table if not exists course(id int, sid int, course string,score int)
row format delimited fields terminated by "\t";
load data local inpath "/home/hadoop/data2.txt" into table course;

select * from course;```

```

select sid,
case course when "yuwen" then score else 0 end as yuwen,
case course when "shuxue" then score else 0 end as shuxue,
case course when "yingyu" then score else 0 end as yingyu
from course;
```

```

select sid from (
select sid,
sum(case course when "yuwen" then score else 0 end) as yuwen,
sum(case course when "shuxue" then score else 0 end) as shuxue,
sum(case course when "yingyu" then score else 0 end) as yingyu
from course group by sid
) b where b.shuxue > b.yuwen;```

```

2014010114
2014010216
2014010317
2014010410
2014010506
2012010609
2012010732
2012010812
2012010919
2012011023
2001010116
2001010212
2001010310
2001010411
2001010529
2013010619
2013010722
2013010812
2013010929
2013011023
2008010105
2008010216
2008010337
2008010414
2008010516
2007010619
2007010712
2007010812
2007010999
2007011023
2010010114
2010010216
2010010317
2010010410
2010010506
2015010649
2015010722
2015010812
2015010999
2015011023

select substr(data,1,4),max(substr(data,9,2)) from table2 group by substr(data,1,4);

```

create database if not exists exercise;
use exercise;
drop table if exists exercise3;
create table exercise3(line string);
load data local inpath "/home/hadoop/data3.txt" into table exercise3;
select * from exercise3;```

```

select substring(line, 1, 4) as year,  max(substring(line, -2)) as max_temp
from exercise3 group  by substring(line, 1, 4);```

```

from exercise3 group by substring(line, 1, 4);

1、要么是group by后面的一个字段
2、要么是聚合函数的结果字段

1、使用TopN 的技巧可以解决

2、再次使用一次连接SQL既可以解决

select substring(b.line, 1, 8) as max_temp_date, a.max_temp
from exercise3 b join (
select substring(c.line, 1, 4) as year,  max(substring(c.line, -2)) as max_temp
from exercise3 c group  by substring(c.line, 1, 4)
) a
on a.year = substring(b.line, 1, 4)  and
a.max_temp = substring(b.line, -2);```