1.获取大数据集之后再执行需要索引的操作,一定要使用中间表!
drop table if EXISTS temp.csm_temp2; create table temp.csm_temp2 select acct_id,sum(csm) csm from temp.csm_temp GROUP BY acct_id; ALTER table temp.csm_temp2 add INDEX key1 (acct_id);-- 为中间表加索引 alter table temp.csm_temp2 modify column acct_id varchar(50); -- 修改索引列数据类型,使其与将要关联的列类型相同!2.join时左右两边关联的列一定要有索引,一定要有相同的数据类型!(否则出现隐式转换,相当于在一边加了函数,索引不起作用)
3.mysql不支持full join,要实现full join再两列求和,可以先取两个子集union all,再根据关联列分组
drop table if EXISTS temp.csm_temp; CREATE table temp.csm_temp select acct_id,integrated_csm csm from zhixiao_cash.feed_express_data where stdate like '201612%' and integrated_csm>0; INSERT into temp.csm_temp select acct_id,csm from zhixiao_cash.original_ad where stdate BETWEEN'2016-12-01'and'2016-12-31' and csm>0; ALTER table temp.csm_temp add INDEX key1 (acct_id); drop table if EXISTS temp.csm_temp2; create table temp.csm_temp2 select acct_id,sum(csm) csm from temp.csm_temp GROUP BY acct_id; alter table temp.csm_temp2 modify column acct_id varchar(50); select s1.acct_id,s1.csm,s2.manager_id,s2.dep_big,s2.dep_small FROM temp.csm_temp2 s1 left JOIN zhixiao_acct.acct_info201612 s2 on s1.acct_id=s2.acct_id -- 存在渠道数据,管理员id和部门为空