/* ------------------------------------------------------------------------- * * selfuncs.h * Selectivity functions and index cost estimation functions for * standard operators and index access methods. * * * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/include/utils/selfuncs.h * * ------------------------------------------------------------------------- */ #ifndef SELFUNCS_H #define SELFUNCS_H #include "fmgr.h" #include "access/htup.h" #include "nodes/relation.h" #include "optimizer/nodegroups.h" #include "parser/parse_oper.h" #include "catalog/pg_operator.h" /* * Note: the default selectivity estimates are not chosen entirely at random. * We want them to be small enough to ensure that indexscans will be used if * available, for typical table densities of ~100 tuples/page. Thus, for * example, 0.01 is not quite small enough, since that makes it appear that * nearly all pages will be hit anyway. Also, since we sometimes estimate * eqsel as 1/num_distinct, we probably want DEFAULT_NUM_DISTINCT to equal * 1/DEFAULT_EQ_SEL. */ /* default selectivity estimate for equalities such as "A = b" */ #define DEFAULT_EQ_SEL 0.005 /* default selectivity estimate for inequalities such as "A < b" */ #define DEFAULT_INEQ_SEL 0.3333333333333333 /* default selectivity estimate for range inequalities "A > b AND A < c" */ #define DEFAULT_RANGE_INEQ_SEL 0.005 /* default selectivity estimate for pattern-match operators such as LIKE */ #define DEFAULT_MATCH_SEL 0.005 /* default number of distinct values in a table */ #define DEFAULT_NUM_DISTINCT 200 /* default number of rows */ #define DEFAULT_NUM_ROWS 10 /* default number of distinct values and biase for the special expression */ #define DEFAULT_SPECIAL_EXPR_DISTINCT 10 #define DEFAULT_SPECIAL_EXPR_BIASE (pow(u_sess->pgxc_cxt.NumDataNodes, (double)1 / 2) / u_sess->pgxc_cxt.NumDataNodes) /* default selectivity estimate for boolean、null、nan、infinite test nodes */ #define DEFAULT_UNK_SEL 0.005 #define DEFAULT_NOT_UNK_SEL (1.0 - DEFAULT_UNK_SEL) /* default selectivity estimate for neq anti join */ #define MIN_NEQ_ANTI_SEL 0.05 #define MAX_NEQ_SEMI_SEL (1.0 - MIN_NEQ_ANTI_SEL) /* If the selectivity is too high, we do not use POISSON to estimate the numbwe of distinct values */ #define SELECTIVITY_THRESHOLD_TO_USE_POISSON 0.95 /* Estimate local distinct of join or agg after filter or join for scattered distribution. */ #define NUM_DISTINCT_SELECTIVITY_FOR_POISSON(distinct, input_rows, selectivity) \ (double)((distinct) * (1 - exp(-((input_rows) * (selectivity) / (distinct))))) /* Estimate distinct from global to local for scattered distribution. */ #define NUM_DISTINCT_GTL_FOR_POISSON(gdistinct, input_rows, num_datanodes, dop) \ (double)(NUM_DISTINCT_SELECTIVITY_FOR_POISSON(gdistinct, input_rows, 1.0 / num_datanodes / (dop))) /* Estimate thread distinct from dn distinct num. */ #define NUM_PARALLEL_DISTINCT_GTL_FOR_POISSON(dn_distinct, dn_rows, dop) \ (double)(NUM_DISTINCT_SELECTIVITY_FOR_POISSON(dn_distinct, dn_rows, 1.0 / (dop))) /* * Clamp a computed probability estimate (which may suffer from roundoff or * estimation errors) to valid range. Argument must be a float variable. */ #define CLAMP_PROBABILITY(p) \ do { \ if (p < 0.0) \ p = 0.0; \ else if (p > 1.0) \ p = 1.0; \ } while (0) /* Return data from examine_variable and friends */ typedef struct VariableStatData { Node* var; /* the Var or expression tree */ RelOptInfo* rel; /* Relation, or NULL if not identifiable */ HeapTuple statsTuple; /* pg_statistic tuple, or NULL if none */ /* NB: if statsTuple!=NULL, it must be freed when caller is done */ void (*freefunc)(HeapTuple tuple); /* how to free statsTuple */ Oid vartype; /* exposed type of expression */ Oid atttype; /* type to pass to get_attstatsslot */ int32 atttypmod; /* typmod to pass to get_attstatsslot */ bool isunique; /* matches unique index or DISTINCT clause */ bool enablePossion; /* indentify we can use possion or not */ bool acl_ok; /* result of ACL check on table or column */ PlannerInfo *root; /* Planner info the var reference */ double numDistinct[2]; /* estimated numdistinct, 0: means unknown, [0]: local, [1]: global */ bool isEstimated; /* indicate that whether estimation have already been done */ PlannerInfo *baseRoot; /* Planner info of the baseVar */ Node *baseVar; /* base Var, owner of the statsTuple */ RelOptInfo *baseRel; /* rel of the baseVar */ bool needAdjust; /* true if need adjust on rel */ } VariableStatData; #define ReleaseVariableStats(vardata) \ do { \ if (HeapTupleIsValid((vardata).statsTuple)) \ (*(vardata).freefunc)((vardata).statsTuple); \ } while (0) typedef enum { Pattern_Type_Like, Pattern_Type_Like_IC, Pattern_Type_Regex, Pattern_Type_Regex_IC } Pattern_Type; typedef enum { Pattern_Prefix_None, Pattern_Prefix_Partial, Pattern_Prefix_Exact } Pattern_Prefix_Status; typedef enum { STATS_TYPE_GLOBAL, STATS_TYPE_LOCAL } STATS_EST_TYPE; /* * Helper routine for estimate_num_groups: add an item to a list of * GroupVarInfos, but only if it's not known equal to any of the existing * entries. */ typedef struct { Node* var; /* might be an expression, not just a Var */ RelOptInfo* rel; /* relation it belongs to */ double ndistinct; /* # distinct values */ bool isdefault; /* if estimated distinct value is default value */ bool es_is_used; /* true if extended statistic is used*/ Bitmapset* es_attnums; /* number of correlated attributes */ } GroupVarInfo; /* * genericcostestimate is a general-purpose estimator that can be used for * most index types. In some cases we use genericcostestimate as the base * code and then incorporate additional index-type-specific knowledge in * the type-specific calling function. To avoid code duplication, we make * genericcostestimate return a number of intermediate values as well as * its preliminary estimates of the output cost values. The GenericCosts * struct includes all these values. * * Callers should initialize all fields of GenericCosts to zero. In addition, * they can set numIndexTuples to some positive value if they have a better * than default way of estimating the number of leaf index tuples visited. */ typedef struct { /* These are the values the cost estimator must return to the planner */ Cost indexStartupCost; /* index-related startup cost */ Cost indexTotalCost; /* total index-related scan cost */ Selectivity indexSelectivity; /* selectivity of index */ double indexCorrelation; /* order correlation of index */ /* Intermediate values we obtain along the way */ double numIndexPages; /* number of leaf pages visited */ double numIndexTuples; /* number of leaf tuples visited */ double spc_random_page_cost; /* relevant random_page_cost value */ double num_sa_scans; /* # indexscans from ScalarArrayOpExprs */ } GenericCosts; extern void set_local_rel_size(PlannerInfo* root, RelOptInfo* rel); extern double get_join_ratio(VariableStatData* vardata, SpecialJoinInfo* sjinfo); extern double get_multiple_by_distkey(PlannerInfo* root, List* distkey, double rows); extern double estimate_agg_num_distinct(PlannerInfo* root, List* group_exprs, Plan* plan, const double* numGroups); extern double estimate_agg_num_distinct(PlannerInfo* root, List* group_exprs, Path* path, const double* numGroups); extern void output_noanalyze_rellist_to_log(int lev); extern void set_noanalyze_rellist(Oid relid, AttrNumber attid); extern double estimate_local_numdistinct(PlannerInfo* root, Node* hashkey, Path* path, SpecialJoinInfo* sjinfo, double* global_distinct, bool* isdefault, VariableStatData* vardata); extern void get_num_distinct(PlannerInfo* root, List* groupExprs, double local_rows, double global_rows, unsigned int num_datanodes, double* numdistinct, List** pgset = NULL); extern double get_local_rows(double global_rows, double multiple, bool replicate, unsigned int num_data_nodes); extern double get_global_rows(double local_rows, double multiple, unsigned int num_data_nodes); #define PATH_LOCAL_ROWS(path) \ get_local_rows( \ (path)->rows, (path)->multiple, IsLocatorReplicated((path)->locator_type), ng_get_dest_num_data_nodes(path)) #define PLAN_LOCAL_ROWS(plan) \ get_local_rows( \ (plan)->plan_rows, (plan)->multiple, (plan)->exec_type != EXEC_ON_DATANODES, ng_get_dest_num_data_nodes(plan)) #define RELOPTINFO_LOCAL_FIELD(root, rel, fldname) \ get_local_rows((rel)->fldname, \ (rel)->multiple, \ IsLocatorReplicated((rel)->locator_type), \ ng_get_dest_num_data_nodes((root), (rel))) #define IDXOPTINFO_LOCAL_FIELD(root, idx, fldname) \ get_local_rows((idx)->fldname, \ (idx)->rel->multiple, \ IsLocatorReplicated((idx)->rel->locator_type), \ ng_get_dest_num_data_nodes((root), (idx)->rel)) #ifndef ENABLE_MULTIPLE_NODES /* get probe for binary search*/ #define MID(low, high) (low + high) / 2 /* get previous bound index */ #define PREVIOUS_BOUND(i) i - 2 /* get previous bound index */ #define NEXT_BOUND(i) i + 1 /* The number of distincts allocated to each bucket must be >= 1.0 */ #define CHECK_DISTINCT_HIST(distinct) distinct < 1.0 ? 1.0 : distinct #endif /* Functions in selfuncs.c */ extern void examine_variable(PlannerInfo* root, Node* node, int varRelid, VariableStatData* vardata); extern bool statistic_proc_security_check(const VariableStatData *vardata, Oid func_oid); extern bool get_restriction_variable( PlannerInfo* root, List* args, int varRelid, VariableStatData* vardata, Node** other, bool* varonleft); extern void get_join_variables(PlannerInfo* root, List* args, SpecialJoinInfo* sjinfo, VariableStatData* vardata1, VariableStatData* vardata2, bool* join_is_reversed); extern double get_variable_numdistinct(VariableStatData* vardata, bool* isdefault, bool adjust_rows = true, double join_ratio = 1.0, SpecialJoinInfo* sjinfo = NULL, STATS_EST_TYPE eType = STATS_TYPE_GLOBAL, bool isJoinVar = false); extern double mcv_selectivity(VariableStatData* vardata, FmgrInfo* opproc, Datum constval, bool varonleft, double* sumcommonp, Oid equaloperator, bool* inmcv, double* lastcommonp = NULL); extern double histogram_selectivity(VariableStatData* vardata, FmgrInfo* opproc, Datum constval, bool varonleft, int min_hist_size, int n_skip, int* hist_size); extern Pattern_Prefix_Status pattern_fixed_prefix( Const* patt, Pattern_Type ptype, Oid collation, Const** prefix, Selectivity* rest_selec); extern Const* make_greater_string(const Const* str_const, FmgrInfo* ltproc, Oid collation); extern Datum eqsel(PG_FUNCTION_ARGS); extern Datum neqsel(PG_FUNCTION_ARGS); extern Datum scalarltsel(PG_FUNCTION_ARGS); extern float8 scalarltsel_internal(PlannerInfo* root, Oid opera, List* args, int varRelid); extern Datum scalargtsel(PG_FUNCTION_ARGS); extern Datum regexeqsel(PG_FUNCTION_ARGS); extern Datum icregexeqsel(PG_FUNCTION_ARGS); extern Datum likesel(PG_FUNCTION_ARGS); extern Datum iclikesel(PG_FUNCTION_ARGS); extern Datum regexnesel(PG_FUNCTION_ARGS); extern Datum icregexnesel(PG_FUNCTION_ARGS); extern Datum nlikesel(PG_FUNCTION_ARGS); extern Datum icnlikesel(PG_FUNCTION_ARGS); extern Datum eqjoinsel(PG_FUNCTION_ARGS); extern Datum neqjoinsel(PG_FUNCTION_ARGS); extern Datum scalarltjoinsel(PG_FUNCTION_ARGS); extern Datum scalargtjoinsel(PG_FUNCTION_ARGS); extern Datum regexeqjoinsel(PG_FUNCTION_ARGS); extern Datum icregexeqjoinsel(PG_FUNCTION_ARGS); extern Datum likejoinsel(PG_FUNCTION_ARGS); extern Datum iclikejoinsel(PG_FUNCTION_ARGS); extern Datum regexnejoinsel(PG_FUNCTION_ARGS); extern Datum icregexnejoinsel(PG_FUNCTION_ARGS); extern Datum nlikejoinsel(PG_FUNCTION_ARGS); extern Datum icnlikejoinsel(PG_FUNCTION_ARGS); extern Selectivity booltestsel( PlannerInfo* root, BoolTestType booltesttype, Node* arg, int varRelid, JoinType jointype, SpecialJoinInfo* sjinfo); extern Selectivity nulltestsel( PlannerInfo* root, NullTestType nulltesttype, Node* arg, int varRelid, JoinType jointype, SpecialJoinInfo* sjinfo); extern Selectivity nantestsel( PlannerInfo* root, NanTestType nantesttype, Node* arg, int varRelid, JoinType jointype, SpecialJoinInfo* sjinfo); extern Selectivity infinitetestsel( PlannerInfo* root, InfiniteTestType infinitetesttype, Node* arg, int varRelid, JoinType jointype, SpecialJoinInfo* sjinfo); extern Selectivity scalararraysel(PlannerInfo* root, ScalarArrayOpExpr* clause, bool is_join_clause, int varRelid, JoinType jointype, SpecialJoinInfo* sjinfo); extern int estimate_array_length(Node* arrayexpr); extern Selectivity rowcomparesel( PlannerInfo* root, RowCompareExpr* clause, int varRelid, JoinType jointype, SpecialJoinInfo* sjinfo); extern void mergejoinscansel(PlannerInfo* root, Node* clause, Oid opfamily, int strategy, bool nulls_first, Selectivity* leftstart, Selectivity* leftend, Selectivity* rightstart, Selectivity* rightend); extern double estimate_num_groups(PlannerInfo* root, List* groupExprs, double input_rows, unsigned int num_datanodes, STATS_EST_TYPE eType = STATS_TYPE_GLOBAL, List** pgset = NULL); extern Selectivity estimate_hash_bucketsize( PlannerInfo* root, Node* hashkey, double nbuckets, Path* inner_path, SpecialJoinInfo* sjinfo, double* distinctnum); extern Datum btcostestimate(PG_FUNCTION_ARGS); extern void btcostestimate_internal(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation); extern Datum ubtcostestimate(PG_FUNCTION_ARGS); extern Datum hashcostestimate(PG_FUNCTION_ARGS); extern Datum gistcostestimate(PG_FUNCTION_ARGS); extern Datum spgcostestimate(PG_FUNCTION_ARGS); extern Datum gincostestimate(PG_FUNCTION_ARGS); extern Datum psortcostestimate(PG_FUNCTION_ARGS); /* Functions in array_selfuncs.c */ extern Selectivity scalararraysel_containment( PlannerInfo* root, Node* leftop, Node* rightop, Oid elemtype, bool isEquality, bool useOr, int varRelid); extern Datum arraycontsel(PG_FUNCTION_ARGS); extern Datum arraycontjoinsel(PG_FUNCTION_ARGS); /* the type for var data ratio we cached. */ typedef enum { RatioType_Filter, RatioType_Join } RatioType; /* var ratio structure for one relation after join with other relation or filter by self. */ typedef struct VarRatio { RatioType ratiotype; /* filter ratio or join ratio. */ Node* var; /* the var of local rel in restriction clause. */ double ratio; /* identify joinratio or filterratio if after compute join selectivity, others it means selectivity of filter. */ Relids joinrelids; /* the joinrel relids. */ } VarRatio; typedef struct VarEqRatio { Var* var; /* the var of local rel in restriction clause. */ double ratio; /* identify joinratio or filterratio if after compute join selectivity, others it means selectivity of filter. */ Relids joinrelids; /* the joinrel relids. */ } VarEqRatio; extern void set_varratio_after_calc_selectivity( VariableStatData* vardata, RatioType type, double ratio, SpecialJoinInfo* sjinfo); extern double get_windowagg_selectivity(PlannerInfo* root, WindowClause* wc, WindowFunc* wfunc, List* partitionExprs, int32 constval, double tuples, unsigned int num_datanodes); extern bool contain_single_col_stat(List* stat_list); extern double convert_timevalue_to_scalar(Datum value, Oid typid); extern void genericcostestimate(PlannerInfo* root, IndexPath* path, double loop_count, double numIndexTuples, Cost* indexStartupCost, Cost* indexTotalCost, Selectivity* indexSelectivity, double* indexCorrelation); #endif /* SELFUNCS_H */