2013-12-02       个评论
我要投稿

# 第一步，建目标表

`建一张表，字段足够多，如，表名为BIG，有43个字段：`

```CREATE TABLE "BIG" (
"H0" VARchar(1),
"ID" VARchar(18),
"H02" VARchar(1),
"H031" VARchar(2),
"H032" VARchar(2),
"H041" VARchar(2),
"H042" VARchar(2),
"H051" VARchar(2),
"H052" VARchar(2),
"H061" VARchar(2),
"H062" VARchar(2),
"H071" VARchar(1),
"H072" VARchar(1),
"H081" VARchar(1),
"H082" VARchar(1),
"H09" VARchar(2),
"H10" VARchar(3),
"H11" VARchar(1),
"H12" VARchar(1),
"H13" VARchar(4),
"H14" VARchar(1),
"H15" VARchar(1),
"H16" VARchar(1),
"H17" VARchar(1),
"H18" VARchar(1),
"H19" VARchar(1),
"H20" VARchar(1),
"H21" VARchar(1),
"H22" VARchar(1),
"H23" VARchar(1),
"HA0" VARchar(1),
"HA1" VARchar(2),
"HA2" VARchar(2),
"HA3" VARchar(1),
"HA4" VARchar(1),
"HA5" VARchar(1),
"HA6" VARchar(1),
"HA7" VARchar(1),
"HA8" VARchar(1),
"HA9" VARchar(1),
"HA10" VARchar(3),
"HA11" VARchar(1),
"HA20" VARchar(2)
);```

# 2. 生成数据

```BEGIN
DECLARE i INT;
SET i = 1;
WHILE i<=1000000 LOOP
INSERT INTO "BIG" VALUES (
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48),
char( cast (rand() * 10 as int) + 48)
);
SET i = i + 1;
if ( i % 2000 = 0) then
COMMIT;
END IF;
END LOOP;
END;```
1000000条数据，几个小时能弄完。

```#include "stdafx.h"
#include <stdio.h>
#include <time.h>

int getN()
{
return rand() % 10;
}

static void gen_data(FILE* f, int n, int colCount)
{
// file : d:\work\demo\BIG.txt, default colCount is 43
for (int i=0; i<n; i++)
{
for (int j=0; j<colCount-1; j++)
{
fprintf(f, "%ld,", getN());
}
fprintf(f, "%ld\n", getN());
}

}

int main(int argc,char** argv)
{
srand( (unsigned)time( NULL ) );

printf("rand = %ld\n", getN());

long begin = (long) time(NULL);

FILE * f = fopen("d:\\asa120\\BIG.txt", "wt");
gen_data(f, 1000000, 43);
fclose(f);
printf("generate finished...\n");
printf("time consumed: %ld \n", (long)time(NULL) - begin);
return 0;
}```

# 3. 加载数据

INPUT INTO BIG from 'd:\\asa120\\BIG.txt' format ascii escapes on escape character '\\' delimited by ',' encoding 'GBK';实测速度比较慢，一秒钟大概也就300多条。文档中说了：

The LOAD TABLE statement adds rows into a table; it doesn't replace them.

Loading data using the LOAD TABLE statement (without the WITH ROW LOGGING and WITH CONTENT LOGGING options) is considerably faster than using the INPUT statement.

```LOAD TABLE BIG (H0',',
ID',',
H02',',
H031',',
H032',',
H041',',
H042',',
H051',',
H052',',
H061',',
H062',',
H071',',
H072',',
H081',',
H082',',
H09',',
H10',',
H11',',
H12',',
H13',',
H14',',
H15',',
H16',',
H17',',
H18',',
H19',',
H20',',
H21',',
H22',',
H23',',
HA0',',
HA1',',
HA2',',
HA3',',
HA4',',
HA5',',
HA6',',
HA7',',
HA8',',
HA9',',
HA10',',
HA11',',
HA20'\X0A')
from 'd:\\asa120\\BIG.txt'
ESCAPES OFF
QUOTES OFF
NOTIFY 100000
WITH CHECKPOINT ON```

'123','456'
'222','111'

input into abc from 'd:\\asa120\\abc.txt' format ascii escapes on escape character '\\' delimited by ',' encoding 'GBK';

```truncate table abc;

load table abc(col1 ',', col2 '0x0A') from 'd:\\asa120\\abc.txt' escapes off;```

# 4. 验证查询

```select ID,
sum(case when ID<>'0' then cnt end) c1,
sum(case when ha3='1'   then cnt end) c2,
sum(case when ha3='2'   then cnt end) c4,
sum(case when ha3='3'   then cnt end) c6,
sum(case when ha3='4'   then cnt end) c8
from (
select  substr(ID,1,6) ID,count(*)cnt,ha3
from BIG
group by id,ha3
) A
group by ID
order by ID```