今天无意中听到有同事在讨论,distinct和group by谁的速度会更快一点,意件不一,其实我也不知道那个好,下午有时间做了一下测试。
1,测试前的准备
-
-
mysql> CREATE TABLE `test_test` (
-
-> `id` int(11) NOT NULL auto_increment,
-
-> `num` int(11) NOT NULL default '0',
-
-> PRIMARY KEY (`id`)
-
-> ) ENGINE=MyISAM DEFAULT CHARSET=utf8 AUTO_INCREMENT=1 ;
-
Query OK, 0 rows affected (0.05 sec)
-
-
mysql> delimiter ||
-
-
-
mysql> create procedure p_test(pa int(11))
-
-> begin
-
->
-
-> declare max_num int(11) default 100000;
-
-> declare i int default 0;
-
-> declare rand_num int;
-
->
-
-> select count(id) into max_num from test_test;
-
->
-
-> while i < pa do
-
-> if max_num < 100000 then
-
-> select cast(rand()*100 as unsigned) into rand_num;
-
-> insert into test_test(num)values(rand_num);
-
-> end if;
-
-> set i = i +1;
-
-> end while;
-
-> end||
-
Query OK, 0 rows affected (0.00 sec)
-
-
mysql> call p_test(100000)||
-
Query OK, 1 row affected (5.66 sec)
-
-
mysql> delimiter ;
-
mysql> select count(id) from test_test;
-
+-----------+
-
| count(id) |
-
+-----------+
-
| 100000 |
-
+-----------+
-
1 row in set (0.00 sec)
-
-
mysql> show variables like "%pro%";
-
+---------------------------+-------+
-
| Variable_name | Value |
-
+---------------------------+-------+
-
| profiling | OFF |
-
| profiling_history_size | 15 |
-
| protocol_version | 10 |
-
| slave_compressed_protocol | OFF |
-
+---------------------------+-------+
-
4 rows in set (0.00 sec)
-
-
mysql> set profiling=1;
-
Query OK, 0 rows affected (0.00 sec)
2,测试
-
-
mysql> select distinct(num) from test_test;
-
mysql> select num from test_test group by num;
-
-
mysql> show profiles;
-
+----------+------------+-------------------------------------------+
-
| Query_ID | Duration | Query |
-
+----------+------------+-------------------------------------------+
-
| 1 | 0.07298225 | select distinct(num) from test_test |
-
| 2 | 0.07319975 | select num from test_test group by num |
-
| 3 | 0.07313525 | select num from test_test group by num |
-
| 4 | 0.07317725 | select distinct(num) from test_test |
-
| 5 | 0.07275200 | select distinct(num) from test_test |
-
| 6 | 0.07298600 | select num from test_test group by num |
-
| 7 | 0.07500700 | select num from test_test group by num |
-
| 8 | 0.07331325 | select distinct(num) from test_test |
-
| 9 | 0.57831575 | create index num_index on test_test (num) |
-
| 10 | 0.00243550 | select distinct(num) from test_test |
-
| 11 | 0.00121975 | select num from test_test group by num |
-
| 12 | 0.00116550 | select distinct(num) from test_test |
-
| 13 | 0.00107650 | select num from test_test group by num |
-
+----------+------------+-------------------------------------------+
-
13 rows in set (0.00 sec)
上面的1-8是4组数据,并且是没有加索引的,从中我们可以看出,distinct比group by 会好一点点
10-13是2组数据,是加了索引以后的,从中我们可以看出,group by 比distinct 会好一点点
一般情况,数据量比较大的表,关联字段都会加索引的,,并且加索引后检索时间只有以前的六分之一左右。
(责任编辑:IT) |