Там такого не сказано. Там сказано что в большинстве случаев _запрос может считаться эквивалентным_ group by , поэтому можно применять соответствующие оптимизации.

Цитировать

1 спам

271

11 августа 2012 года

MrXaK

721 / / 31.12.2002

Вот это _запрос может считаться эквивалентным_ означает, то что план выполнения запроса одинаковый.
Но, кстати, индекс на поле при DISTINCT должен стоять, иначе EXPLAIN будет уже разный.
Но в некоторых случаях DISTINCT именно заменяется на GROUP BY

Лезем в исходники, ./sql/sql_select.cc, начиная со строки 1201 есть несколько кусков, посвящённых оптимизации DISTINCT и GROUP BY
1:

Свернуть исходник

Код:

/* Optimize distinct away if possible */

  {

    ORDER *org_order= order;

    order=remove_const(this, order,conds,1, &simple_order);

    if (thd->is_error())

    {

      error= 1;

      DBUG_PRINT("error",("Error from remove_const"));

      DBUG_RETURN(1);

    }

    /*

      If we are using ORDER BY NULL or ORDER BY const_expression,

      return result in any order (even if we are using a GROUP BY)

    */

    if (!order && org_order)

      skip_sort_order= 1;

  }

  /*

     Check if we can optimize away GROUP BY/DISTINCT.

     We can do that if there are no aggregate functions, the

     fields in DISTINCT clause (if present) and/or columns in GROUP BY

     (if present) contain direct references to all key parts of

     an unique index (in whatever order) and if the key parts of the

     unique index cannot contain NULLs.

     Note that the unique keys for DISTINCT and GROUP BY should not

     be the same (as long as they are unique).

     The FROM clause must contain a single non-constant table.

  */

  if (tables - const_tables == 1 && (group_list || select_distinct) &&

      !tmp_table_param.sum_func_count &&

      (!join_tab[const_tables].select ||

       !join_tab[const_tables].select->quick ||

       join_tab[const_tables].select->quick->get_type() != 

       QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX))

  {

    if (group_list && rollup.state == ROLLUP::STATE_NONE &&

       list_contains_unique_index(join_tab[const_tables].table,

                                 find_field_in_order_list,

                                 (void *) group_list))

    {

      /*

        We have found that grouping can be removed since groups correspond to

        only one row anyway, but we still have to guarantee correct result

        order. The line below effectively rewrites the query from GROUP BY

        <fields> to ORDER BY <fields>. There are two exceptions:

        - if skip_sort_order is set (see above), then we can simply skip

          GROUP BY;

        - we can only rewrite ORDER BY if the ORDER BY fields are 'compatible'

          with the GROUP BY ones, i.e. either one is a prefix of another.

          We only check if the ORDER BY is a prefix of GROUP BY. In this case

          test_if_subpart() copies the ASC/DESC attributes from the original

          ORDER BY fields.

          If GROUP BY is a prefix of ORDER BY, then it is safe to leave

          'order' as is.

       */

      if (!order || test_if_subpart(group_list, order))

          order= skip_sort_order ? 0 : group_list;

      /*

        If we have an IGNORE INDEX FOR GROUP BY(fields) clause, this must be 

        rewritten to IGNORE INDEX FOR ORDER BY(fields).

      */

      join_tab->table->keys_in_use_for_order_by=

        join_tab->table->keys_in_use_for_group_by;

      group_list= 0;

      group= 0;

    }

    if (select_distinct &&

       list_contains_unique_index(join_tab[const_tables].table,

                                 find_field_in_item_list,

                                 (void *) &fields_list))

    {

      select_distinct= 0;

    }

  }

  if (group_list || tmp_table_param.sum_func_count)

  {

    if (! hidden_group_fields && rollup.state == ROLLUP::STATE_NONE)

      select_distinct=0;

  }

  else if (select_distinct && tables - const_tables == 1 &&

           rollup.state == ROLLUP::STATE_NONE)

  {

    /*

      We are only using one table. In this case we change DISTINCT to a

      GROUP BY query if:

      - The GROUP BY can be done through indexes (no sort) and the ORDER

        BY only uses selected fields.

    (In this case we can later optimize away GROUP BY and ORDER BY)

      - We are scanning the whole table without LIMIT

        This can happen if:

        - We are using CALC_FOUND_ROWS

        - We are using an ORDER BY that can't be optimized away.

      We don't want to use this optimization when we are using LIMIT

      because in this case we can just create a temporary table that

      holds LIMIT rows and stop when this table is full.

    */

    JOIN_TAB *tab= &join_tab[const_tables];

    bool all_order_fields_used;

    if (order)

      skip_sort_order= test_if_skip_sort_order(tab, order, select_limit, 1, 

        &tab->table->keys_in_use_for_order_by);

    if ((group_list=create_distinct_group(thd, select_lex->ref_pointer_array,

                                          order, fields_list, all_fields,

                          &all_order_fields_used)))

    {

      bool skip_group= (skip_sort_order &&

        test_if_skip_sort_order(tab, group_list, select_limit, 1, 

                                &tab->table->keys_in_use_for_group_by) != 0);

      count_field_types(select_lex, &tmp_table_param, all_fields, 0);

      if ((skip_group && all_order_fields_used) ||

      select_limit == HA_POS_ERROR ||

      (order && !skip_sort_order))

      {

    /*  Change DISTINCT to GROUP BY */

    select_distinct= 0;

    no_order= !order;

    if (all_order_fields_used)

    {

      if (order && skip_sort_order)

      {

        /*

          Force MySQL to read the table in sorted order to get result in

          ORDER BY order.

        */

        tmp_table_param.quick_group=0;

      }

      order=0;

        }

    group=1;                // For end_write_group

      }

      else

    group_list= 0;

    }

    else if (thd->is_fatal_error)           // End of memory

      DBUG_RETURN(1);

  }

По комментам в коде видно. Чуть ниже (с 1404 строки) описано, когда будет создаваться временная таблица, на 1625 - там ещё HAVING возникать может.

Цитировать

1 спам

277

13 августа 2012 года

arrjj

1.7K / / 26.01.2011

ну вообще все оптимизации зависят от субд, где то преобразовывается, где то нет, поэтому не нужно заставлять людей использовать group by там где можно обойтись distinct - это всё же больше вопрос синтаксиса sql, а не обработки запросов субд

Цитировать

0 спам

271

13 августа 2012 года

MrXaK

721 / / 31.12.2002

Это, кстати, холиварная тема, но я считаю, что решение о смене СУБД - очень сильное, поэтому пока такого решения нет, то надо оптимизировать под конкретную СУБД. В данном случае в mysql толку от DISTINCT нету, если он всё равно будет развёрнут в GROUP BY, а затем ещё раз оптимизирован. Более того, если нет индекса на поле, то DISTINCT вообще вызовет временную таблицу, а это всё-таки существенно. Но в целом вы правы, для другой СУБД DISTINCT будет использовать оптимальнее.

Цитировать

Ваш аккаунт

Последние темы форума

Почтовая рассылка

Как вывести строки только с уникальным значением?

8 ответов