diff --git a/cocci/palloc_array.cocci b/cocci/palloc_array.cocci new file mode 100644 index 000000000000..aeeab74c3a9d --- /dev/null +++ b/cocci/palloc_array.cocci @@ -0,0 +1,157 @@ +// Since PG16 there are array versions of common palloc operations, so +// we can use those instead. +// +// We ignore cases where we have a anonymous struct and also when the +// type of the variable being assigned to is different from the +// inferred type. +// +// Options: --no-includes --include-headers + +virtual patch +virtual report +virtual context + +// These rules (soN) are needed to rewrite types of the form +// sizeof(T[C]) to C * sizeof(T) since Cocci cannot (currently) handle +// it. +@initialize:python@ +@@ +import re + +CRE = re.compile(r'(.*)\s+\[\s+(\d+)\s+\]$') + +def is_array_type(s): + mre = CRE.match(s) + return (mre is not None) + +@so1 depends on patch@ +type T : script:python() { is_array_type(T) }; +@@ +palloc(sizeof(T)) + +@script:python so2 depends on patch@ +T << so1.T; +T2; +E; +@@ +mre = CRE.match(T) +coccinelle.T2 = cocci.make_type(mre.group(1)) +coccinelle.E = cocci.make_expr(mre.group(2)) + +@depends on patch@ +type so1.T; +type so2.T2; +expression so2.E; +@@ +- palloc(sizeof(T)) ++ palloc(E * sizeof(T2)) + +@r1 depends on report || context@ +type T !~ "^struct {"; +expression E; +position p; +idexpression T *I; +identifier alloc = {palloc0, palloc}; +@@ +* I = alloc@p(E * sizeof(T)) + +@script:python depends on report@ +p << r1.p; +alloc << r1.alloc; +@@ +coccilib.report.print_report(p[0], f"this {alloc} can be replaced with {alloc}_array") + +@depends on patch@ +type T !~ "^struct {"; +expression E; +T *P; +idexpression T* I; +constant C; +identifier alloc = {palloc0, palloc}; +fresh identifier alloc_array = alloc ## "_array"; +@@ +( +- I = (T*) alloc(E * sizeof( \( *P \| P[C] \) )) ++ I = alloc_array(T, E) +| +- I = (T*) alloc(E * sizeof(T)) ++ I = alloc_array(T, E) +| +- I = alloc(E * sizeof( \( *P \| P[C] \) )) ++ I = alloc_array(T, E) +| +- I = alloc(E * sizeof(T)) ++ I = alloc_array(T, E) +) + +@r3 depends on report || context@ +type T !~ "^struct {"; +expression E; +idexpression T *P; +idexpression T *I; +position p; +@@ +* I = repalloc@p(P, E * sizeof(T)) + +@script:python depends on report@ +p << r3.p; +@@ +coccilib.report.print_report(p[0], "this repalloc can be replaced with repalloc_array") + +@depends on patch@ +type T !~ "^struct {"; +expression E; +idexpression T *P1; +idexpression T *P2; +idexpression T *I; +constant C; +@@ +( +- I = (T*) repalloc(P1, E * sizeof( \( *P2 \| P2[C] \) )) ++ I = repalloc_array(P1, T, E) +| +- I = (T*) repalloc(P1, E * sizeof(T)) ++ I = repalloc_array(P1, T, E) +| +- I = repalloc(P1, E * sizeof( \( *P2 \| P2[C] \) )) ++ I = repalloc_array(P1, T, E) +| +- I = repalloc(P1, E * sizeof(T)) ++ I = repalloc_array(P1, T, E) +) + +@r4 depends on report || context@ +type T !~ "^struct {"; +position p; +idexpression T* I; +identifier alloc = {palloc, palloc0}; +@@ +* I = alloc@p(sizeof(T)) + +@script:python depends on report@ +p << r4.p; +alloc << r4.alloc; +@@ +coccilib.report.print_report(p[0], f"this {alloc} can be replaced with {alloc}_object") + +@depends on patch@ +type T !~ "^struct {"; +T* P; +idexpression T *I; +constant C; +identifier alloc = {palloc, palloc0}; +fresh identifier alloc_object = alloc ## "_object"; +@@ +( +- I = (T*) alloc(sizeof( \( *P \| P[C] \) )) ++ I = alloc_object(T) +| +- I = (T*) alloc(sizeof(T)) ++ I = alloc_object(T) +| +- I = alloc(sizeof( \( *P \| P[C] \) )) ++ I = alloc_object(T) +| +- I = alloc(sizeof(T)) ++ I = alloc_object(T) +) diff --git a/cocci/palloc_sizeof.cocci b/cocci/palloc_sizeof.cocci new file mode 100644 index 000000000000..5f8593c2687d --- /dev/null +++ b/cocci/palloc_sizeof.cocci @@ -0,0 +1,49 @@ +virtual report +virtual context +virtual patch + +@initialize:python@ +@@ +import re + +CONST_CRE = re.compile(r'\bconst\b') + +def is_simple_type(s): + return s != 'void' and not CONST_CRE.search(s) + +@r1 depends on report || context@ +type T1 : script:python () { is_simple_type(T1) }; +idexpression T1 *I; +type T2 != T1; +position p; +expression E; +identifier func = {palloc, palloc0}; +@@ +( +* I = func@p(sizeof(T2)) +| +* I = func@p(E * sizeof(T2)) +) + +@script:python depends on report@ +T1 << r1.T1; +T2 << r1.T2; +I << r1.I; +p << r1.p; +@@ +coccilib.report.print_report(p[0], f"'{I}' has type '{T1}*' but 'sizeof({T2})' is used to allocate memory") + +@depends on patch@ +type T1 : script:python () { is_simple_type(T1) }; +idexpression T1 *I; +type T2 != T1; +expression E; +identifier func = {palloc, palloc0}; +@@ +( +- I = func(sizeof(T2)) ++ I = func(sizeof(T1)) +| +- I = func(E * sizeof(T2)) ++ I = func(E * sizeof(T1)) +) diff --git a/cocci/use_pg_cmp.cocci b/cocci/use_pg_cmp.cocci new file mode 100644 index 000000000000..8a258e61e5d1 --- /dev/null +++ b/cocci/use_pg_cmp.cocci @@ -0,0 +1,125 @@ +// Find cases where we can use the new pg_cmp_* functions. +// +// Copyright 2025 Mats Kindahl, Timescale. +// +// Options: --no-includes --include-headers + +virtual report +virtual context +virtual patch + +@initialize:python@ +@@ + +import re + +TYPMAP = { + 'BlockNumber': 'pg_cmp_u32', + 'ForkNumber': 'pg_cmp_s32', + 'OffsetNumber': 'pg_cmp_s16', + 'int': 'pg_cmp_s32', + 'int16': 'pg_cmp_s16', + 'int32': 'pg_cmp_s32', + 'uint16': 'pg_cmp_u16', + 'uint32': 'pg_cmp_u32', + 'unsigned int': 'pg_cmp_u32', +} + +def is_valid(expr): + return not re.search(r'DatumGet[A-Za-z]+', expr) + +@r1e depends on context || report expression@ +type TypeName : script:python() { TypeName in TYPMAP }; +position pos; +TypeName lhs : script:python() { is_valid(lhs) }; +TypeName rhs : script:python() { is_valid(rhs) }; +@@ +* lhs@pos < rhs ? -1 : lhs > rhs ? 1 : 0 + +@script:python depends on report@ +lhs << r1e.lhs; +rhs << r1e.rhs; +pos << r1e.pos; +@@ +coccilib.report.print_report(pos[0], f"conditional checks between '{lhs}' and '{rhs}' can be replaced with a PostgreSQL comparison function") + +@r1 depends on context || report@ +type TypeName : script:python() { TypeName in TYPMAP }; +position pos; +TypeName lhs : script:python() { is_valid(lhs) }; +TypeName rhs : script:python() { is_valid(rhs) }; +@@ +( +* if@pos (lhs < rhs) return -1; else if (lhs > rhs) return 1; return 0; +| +* if@pos (lhs < rhs) return -1; else if (lhs > rhs) return 1; else return 0; +| +* if@pos (lhs < rhs) return -1; if (lhs > rhs) return 1; return 0; +| +* if@pos (lhs > rhs) return 1; if (lhs < rhs) return -1; return 0; +| +* if@pos (lhs == rhs) return 0; if (lhs > rhs) return 1; return -1; +| +* if@pos (lhs == rhs) return 0; return lhs > rhs ? 1 : -1; +| +* if@pos (lhs == rhs) return 0; return lhs < rhs ? -1 : 1; +) + +@script:python depends on report@ +lhs << r1.lhs; +rhs << r1.rhs; +pos << r1.pos; +@@ +coccilib.report.print_report(pos[0], f"conditional checks between '{lhs}' and '{rhs}' can be replaced with a PostgreSQL comparison function") + +@expr_repl depends on patch expression@ +type TypeName : script:python() { TypeName in TYPMAP }; +fresh identifier cmp = script:python(TypeName) { TYPMAP[TypeName] }; +TypeName lhs : script:python() { is_valid(lhs) }; +TypeName rhs : script:python() { is_valid(rhs) }; +@@ +- lhs < rhs ? -1 : lhs > rhs ? 1 : 0 ++ cmp(lhs,rhs) + +@stmt_repl depends on patch@ +type TypeName : script:python() { TypeName in TYPMAP }; +fresh identifier cmp = script:python(TypeName) { TYPMAP[TypeName] }; +TypeName lhs : script:python() { is_valid(lhs) }; +TypeName rhs : script:python() { is_valid(rhs) }; +@@ +( +- if (lhs < rhs) return -1; if (lhs > rhs) return 1; return 0; ++ return cmp(lhs,rhs); +| +- if (lhs < rhs) return -1; else if (lhs > rhs) return 1; return 0; ++ return cmp(lhs,rhs); +| +- if (lhs < rhs) return -1; else if (lhs > rhs) return 1; else return 0; ++ return cmp(lhs,rhs); +| +- if (lhs > rhs) return 1; if (lhs < rhs) return -1; return 0; ++ return cmp(lhs,rhs); +| +- if (lhs > rhs) return 1; else if (lhs < rhs) return -1; return 0; ++ return cmp(lhs,rhs); +| +- if (lhs == rhs) return 0; if (lhs > rhs) return 1; return -1; ++ return cmp(lhs,rhs); +| +- if (lhs == rhs) return 0; return lhs > rhs ? 1 : -1; ++ return cmp(lhs,rhs); +| +- if (lhs == rhs) return 0; return lhs < rhs ? -1 : 1; ++ return cmp(lhs,rhs); +) + +// Add an include if there were none and we had to do some +// replacements +@has_include depends on patch@ +@@ + #include "common/int.h" + +@depends on patch && !has_include && (stmt_repl || expr_repl)@ +@@ + #include ... ++ #include "common/int.h" diff --git a/cocci/use_stringinfodata.cocci b/cocci/use_stringinfodata.cocci new file mode 100644 index 000000000000..4186027f8c92 --- /dev/null +++ b/cocci/use_stringinfodata.cocci @@ -0,0 +1,155 @@ +// Replace uses of StringInfo with StringInfoData where the info is +// dynamically allocated but (optionally) freed at the end of the +// block. This will avoid one dynamic allocation that otherwise have +// to be dealt with. +// +// For example, this code: +// +// StringInfo info = makeStringInfo(); +// ... +// appendStringInfo(info, ...); +// ... +// return do_stuff(..., info->data, ...); +// +// Can be replaced with: +// +// StringInfoData info; +// initStringInfo(&info); +// ... +// appendStringInfo(&info, ...); +// ... +// return do_stuff(..., info.data, ...); + +virtual report +virtual context +virtual patch + +// This rule captures the position of the makeStringInfo() and bases +// all changes around that. It matches the case that we should *not* +// replace, that is, those that either (1) return the pointer or (2) +// assign the pointer to a variable or (3) assign a variable to the +// pointer. +// +// The first two cases are matched because they could potentially leak +// the pointer outside the function, for some expressions, but the +// last one is just a convenience. +// +// If we replace this, the resulting change will result in a value +// copy of a structure, which might not be optimal, so we do not do a +// replacement. +@id1 exists@ +typedef StringInfo; +local idexpression StringInfo info; +position pos; +expression E; +@@ + info@pos = makeStringInfo() + ... +( + return info; +| + info = E +| + E = info +) + +@r1 depends on !patch disable decl_init exists@ +identifier info, fld; +position dpos, pos != id1.pos; +@@ +( +* StringInfo@dpos info; + ... +* info@pos = makeStringInfo(); +| +* StringInfo@dpos info@pos = makeStringInfo(); +) +<... +( +* \(pfree\|destroyStringInfo\)(info); +| +* info->fld +| +* *info +| +* info +) +...> + +@script:python depends on report@ +info << r1.info; +dpos << r1.dpos; +@@ +coccilib.report.print_report(dpos[0], f"Variable '{info}' of type StringInfo can be defined using StringInfoData") + +@depends on patch disable decl_init exists@ +identifier info, fld; +position pos != id1.pos; +@@ +- StringInfo info; ++ StringInfoData info; + ... +- info@pos = makeStringInfo(); ++ initStringInfo(&info); +<... +( +- \(destroyStringInfo\|pfree\)(info); +| + info +- ->fld ++ .fld +| +- *info ++ info +| +- info ++ &info +) +...> + +// Here we repeat the matching of the "bad case" since we cannot +// inherit over modifications +@id2 exists@ +typedef StringInfo; +local idexpression StringInfo info; +position pos; +expression E; +@@ + info@pos = makeStringInfo() + ... +( + return info; +| + info = E +| + E = info +) + +@depends on patch exists@ +identifier info, fld; +position pos != id2.pos; +statement S, S1; +@@ +- StringInfo info@pos = makeStringInfo(); ++ StringInfoData info; + ... when != S +( +<... +( +- \(destroyStringInfo\|pfree\)(info); +| + info +- ->fld ++ .fld +| +- *info ++ info +| +- info ++ &info +) +...> +& ++ initStringInfo(&info); + S1 +) diff --git a/configure b/configure index 30d949c3c466..6b5be789cc97 100755 --- a/configure +++ b/configure @@ -775,6 +775,9 @@ enable_coverage GENHTML LCOV GCOV +enable_coccicheck +SPFLAGS +SPATCH enable_debug enable_rpath default_port @@ -842,6 +845,7 @@ with_pgport enable_rpath enable_debug enable_profiling +enable_coccicheck enable_coverage enable_dtrace enable_tap_tests @@ -1540,6 +1544,7 @@ Optional Features: executables --enable-debug build with debugging symbols (-g) --enable-profiling build with profiling enabled + --enable-coccicheck enable Coccinelle checks (requires spatch) --enable-coverage build with coverage testing instrumentation --enable-dtrace build with DTrace support --enable-tap-tests enable TAP tests (requires Perl and IPC::Run) @@ -3341,6 +3346,91 @@ fi +# +# --enable-coccicheck enables Coccinelle check target "coccicheck" +# + + +# Check whether --enable-coccicheck was given. +if test "${enable_coccicheck+set}" = set; then : + enableval=$enable_coccicheck; + case $enableval in + yes) + if test -z "$SPATCH"; then + for ac_prog in spatch +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_SPATCH+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $SPATCH in + [\\/]* | ?:[\\/]*) + ac_cv_path_SPATCH="$SPATCH" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_SPATCH="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +SPATCH=$ac_cv_path_SPATCH +if test -n "$SPATCH"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $SPATCH" >&5 +$as_echo "$SPATCH" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$SPATCH" && break +done + +else + # Report the value of SPATCH in configure's output in all cases. + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for SPATCH" >&5 +$as_echo_n "checking for SPATCH... " >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $SPATCH" >&5 +$as_echo "$SPATCH" >&6; } +fi + +if test -z "$SPATCH"; then + as_fn_error $? "spatch not found" "$LINENO" 5 +fi + + ;; + no) + : + ;; + *) + as_fn_error $? "no argument expected for --enable-coccicheck option" "$LINENO" 5 + ;; + esac + +else + enable_coccicheck=no + +fi + + + + # # --enable-coverage enables generation of code coverage metrics with gcov # @@ -15133,7 +15223,7 @@ else We can't simply define LARGE_OFF_T to be 9223372036854775807, since some C++ compilers masquerading as C compilers incorrectly reject 9223372036854775807. */ -#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) +#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31)) int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 && LARGE_OFF_T % 2147483647 == 1) ? 1 : -1]; @@ -15179,7 +15269,7 @@ else We can't simply define LARGE_OFF_T to be 9223372036854775807, since some C++ compilers masquerading as C compilers incorrectly reject 9223372036854775807. */ -#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) +#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31)) int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 && LARGE_OFF_T % 2147483647 == 1) ? 1 : -1]; @@ -15203,7 +15293,7 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext We can't simply define LARGE_OFF_T to be 9223372036854775807, since some C++ compilers masquerading as C compilers incorrectly reject 9223372036854775807. */ -#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) +#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31)) int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 && LARGE_OFF_T % 2147483647 == 1) ? 1 : -1]; @@ -15248,7 +15338,7 @@ else We can't simply define LARGE_OFF_T to be 9223372036854775807, since some C++ compilers masquerading as C compilers incorrectly reject 9223372036854775807. */ -#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) +#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31)) int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 && LARGE_OFF_T % 2147483647 == 1) ? 1 : -1]; @@ -15272,7 +15362,7 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext We can't simply define LARGE_OFF_T to be 9223372036854775807, since some C++ compilers masquerading as C compilers incorrectly reject 9223372036854775807. */ -#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) +#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31)) int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 && LARGE_OFF_T % 2147483647 == 1) ? 1 : -1]; diff --git a/configure.ac b/configure.ac index 25cdfcf65af7..15db95813b00 100644 --- a/configure.ac +++ b/configure.ac @@ -199,6 +199,18 @@ AC_SUBST(enable_debug) PGAC_ARG_BOOL(enable, profiling, no, [build with profiling enabled ]) +# +# --enable-coccicheck enables Coccinelle check target "coccicheck" +# +PGAC_ARG_BOOL(enable, coccicheck, no, + [enable Coccinelle checks (requires spatch)], +[PGAC_PATH_PROGS(SPATCH, spatch) +if test -z "$SPATCH"; then + AC_MSG_ERROR([spatch not found]) +fi +AC_SUBST(SPFLAGS)]) +AC_SUBST(enable_coccicheck) + # # --enable-coverage enables generation of code coverage metrics with gcov # diff --git a/meson.build b/meson.build index b8da4966297d..7fca3a87a110 100644 --- a/meson.build +++ b/meson.build @@ -348,6 +348,7 @@ missing = find_program('config/missing', native: true) cp = find_program('cp', required: false, native: true) xmllint_bin = find_program(get_option('XMLLINT'), native: true, required: false) xsltproc_bin = find_program(get_option('XSLTPROC'), native: true, required: false) +spatch = find_program(get_option('SPATCH'), native: true, required: false) bison_flags = [] if bison.found() @@ -1649,6 +1650,34 @@ else endif +############################################################### +# Option: Coccinelle checks +############################################################### + +coccicheck_opt = get_option('coccicheck') +coccicheck_dep = not_found_dep +if not coccicheck_opt.disabled() + if spatch.found() + coccicheck_dep = declare_dependency() + elif coccicheck_opt.enabled() + error('missing required tools (spatch needed) for Coccinelle checks') + endif +endif + +if coccicheck_opt.enabled() + coccicheck_modes = ['context', 'report', 'patch'] + foreach mode : coccicheck_modes + run_target('coccicheck-' + mode, + command: [python, files('src/tools/coccicheck.py'), + '--mode', mode, + '--spatch', spatch, + '--patchdir', '@SOURCE_ROOT@', + '@SOURCE_ROOT@/cocci/**/*.cocci', + '@SOURCE_ROOT@/src', + '@SOURCE_ROOT@/contrib', + ]) + endforeach +endif ############################################################### # Compiler tests @@ -3866,6 +3895,7 @@ if meson.version().version_compare('>=0.57') { 'bison': '@0@ @1@'.format(bison.full_path(), bison_version), 'dtrace': dtrace, + 'spatch': spatch, 'flex': '@0@ @1@'.format(flex.full_path(), flex_version), }, section: 'Programs', diff --git a/meson_options.txt b/meson_options.txt index dd7126da3a73..57cd046bdfb8 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -43,6 +43,9 @@ option('cassert', type: 'boolean', value: false, option('tap_tests', type: 'feature', value: 'auto', description: 'Enable TAP tests') +option('coccicheck', type: 'feature', value: 'auto', + description: 'Enable Coccinelle checks') + option('injection_points', type: 'boolean', value: false, description: 'Enable injection points') @@ -52,7 +55,6 @@ option('PG_TEST_EXTRA', type: 'string', value: '', option('PG_GIT_REVISION', type: 'string', value: 'HEAD', description: 'git revision to be packaged by pgdist target') - # Compilation options option('extra_include_dirs', type: 'array', value: [], @@ -198,6 +200,9 @@ option('PYTHON', type: 'array', value: ['python3', 'python'], option('SED', type: 'string', value: 'gsed', description: 'Path to sed binary') +option('SPATCH', type: 'string', value: 'spatch', + description: 'Path to spatch binary, used for SmPL patches') + option('STRIP', type: 'string', value: 'strip', description: 'Path to strip binary, used for PGXS emulation') diff --git a/src/Makefile.global.in b/src/Makefile.global.in index cce29a37ac50..7066319b4392 100644 --- a/src/Makefile.global.in +++ b/src/Makefile.global.in @@ -19,7 +19,7 @@ # # Meta configuration -standard_targets = all install installdirs uninstall clean distclean coverage check checkprep installcheck init-po update-po +standard_targets = all install installdirs uninstall clean distclean coccicheck coverage check checkprep installcheck init-po update-po # these targets should recurse even into subdirectories not being built: standard_always_targets = clean distclean @@ -207,6 +207,7 @@ enable_rpath = @enable_rpath@ enable_nls = @enable_nls@ enable_debug = @enable_debug@ enable_dtrace = @enable_dtrace@ +enable_coccicheck = @enable_coccicheck@ enable_coverage = @enable_coverage@ enable_injection_points = @enable_injection_points@ enable_tap_tests = @enable_tap_tests@ @@ -383,7 +384,7 @@ CLDR_VERSION = 45 # If a particular subdirectory knows this isn't needed in itself or its # children, it can set NO_GENERATED_HEADERS. -all install check installcheck: submake-generated-headers +all install check installcheck coccicheck: submake-generated-headers .PHONY: submake-generated-headers @@ -532,6 +533,11 @@ FOP = @FOP@ XMLLINT = @XMLLINT@ XSLTPROC = @XSLTPROC@ +# Coccinelle + +SPATCH = @SPATCH@ +SPFLAGS = @SPFLAGS@ + # Code coverage GCOV = @GCOV@ @@ -1002,6 +1008,20 @@ endif # nls.mk endif # enable_nls +########################################################################## +# +# Coccinelle checks +# + +ifeq ($(enable_coccicheck), yes) +coccicheck_py = $(top_srcdir)/src/tools/coccicheck.py +coccicheck = SPATCH=$(SPATCH) SPFLAGS=$(SPFLAGS) $(PYTHON) $(coccicheck_py) + +.PHONY: coccicheck +coccicheck: + $(coccicheck) --mode=$(MODE) 'cocci/**/*.cocci' $(top_srcdir) +endif # enable_coccicheck + ########################################################################## # # Coverage diff --git a/src/makefiles/meson.build b/src/makefiles/meson.build index 46d8da070e82..489a86d351fb 100644 --- a/src/makefiles/meson.build +++ b/src/makefiles/meson.build @@ -57,6 +57,7 @@ pgxs_kv = { 'enable_injection_points': get_option('injection_points') ? 'yes' : 'no', 'enable_tap_tests': tap_tests_enabled ? 'yes' : 'no', 'enable_debug': get_option('debug') ? 'yes' : 'no', + 'enable_coccicheck': spatch.found() ? 'yes' : 'no', 'enable_coverage': 'no', 'enable_dtrace': dtrace.found() ? 'yes' : 'no', @@ -151,6 +152,7 @@ pgxs_bins = { 'TAR': tar, 'ZSTD': program_zstd, 'DTRACE': dtrace, + 'SPATCH': spatch, } pgxs_empty = [ @@ -166,6 +168,10 @@ pgxs_empty = [ 'DBTOEPUB', 'FOP', + # Coccinelle is not supported by pgxs + 'SPATCH', + 'SPFLAGS', + # supporting coverage for pgxs-in-meson build doesn't seem worth it 'GENHTML', 'LCOV', diff --git a/src/makefiles/pgxs.mk b/src/makefiles/pgxs.mk index 0de3737e789b..144459dccd2f 100644 --- a/src/makefiles/pgxs.mk +++ b/src/makefiles/pgxs.mk @@ -95,6 +95,9 @@ endif ifeq ($(FLEX),) FLEX = flex endif +ifeq ($(SPATCH),) +SPATCH = spatch +endif endif # PGXS diff --git a/src/tools/coccicheck.py b/src/tools/coccicheck.py new file mode 100755 index 000000000000..838f8184c547 --- /dev/null +++ b/src/tools/coccicheck.py @@ -0,0 +1,185 @@ +#!/usr/bin/env python3 + +"""Run Coccinelle on a set of files and directories. + +This is a re-written version of the Linux ``coccicheck`` script. + +Coccicheck can run in two different modes (the original have four +different modes): + +- *patch*: patch files using the cocci file. + +- *report*: report will report any improvements that this script can + make, but not show any patch. + +- *context*: show the context where the patch can be applied. + +The program will take a single cocci file and call spatch(1) with a +set of paths that can be either files or directories. + +When starting, the cocci file will be parsed and any lines containing +"Options:" or "Requires:" will be treated specially. + +- Lines containing "Options:" will have a list of options to add to + the call of the spatch(1) program. These options will be added last. + +- Lines containing "Requires:" can contain a version of spatch(1) that + is required for this cocci file. If the version requirements are not + satisfied, the file will not be used. + +When calling spatch(1), it will set the virtual rules "patch", +"report", or "context" and the cocci file can use these to act +differently depending on the mode. + +The following environment variables can be set: + +SPATCH: Path to spatch program. This will be used if no path is + passed using the option --spatch. + +SPFLAGS: Extra flags to use when calling spatch. These will be added + last. + +MODE: Mode to use. It will be used if no --mode is passed to + coccicheck.py. + +""" + +import argparse +import os +import sys +import subprocess +import re + +from pathlib import PurePath, Path +from packaging import version + +VERSION_CRE = re.compile( + r'spatch version (\S+) compiled with OCaml version (\S+)' +) + + +def parse_metadata(cocci_file): + """Parse metadata in Cocci file.""" + metadata = {} + with open(cocci_file) as fh: + for line in fh: + mre = re.match(r'(Options|Requires):(.*)', line, re.IGNORECASE) + if mre: + metadata[mre.group(1).lower()] = mre.group(2) + return metadata + + +def get_config(args): + """Compute configuration information.""" + # Figure out spatch version. We just need to read the first line + config = {} + cmd = [args.spatch, '--version'] + with subprocess.Popen(cmd, stdout=subprocess.PIPE, text=True) as proc: + for line in proc.stdout: + mre = VERSION_CRE.match(line) + if mre: + config['spatch_version'] = mre.group(1) + break + return config + + +def run_spatch(cocci_file, args, config, env): + """Run coccinelle on the provided file.""" + if args.verbose > 1: + print("processing cocci file", cocci_file) + spatch_version = config['spatch_version'] + metadata = parse_metadata(cocci_file) + + # Check that we have a valid version + if 'required' in metadata: + required_version = version.parse(metadata['required']) + if required_version < spatch_version: + print( + f'Skipping SmPL patch {cocci_file}: ' + f'requires {required_version} (had {spatch_version})' + ) + return + + command = [ + args.spatch, + "-D", args.mode, + "--cocci-file", cocci_file, + "--very-quiet", + ] + + if 'options' in metadata: + command.append(metadata['options']) + if args.mode == 'report': + command.append('--no-show-diff') + if args.patchdir: + command.extend(['--patch', args.patchdir]) + if args.jobs: + command.extend(['--jobs', args.jobs]) + if args.spflags: + command.append(args.spflags) + + for path in args.path: + subprocess.run(command + [path], env=env, check=True) + + +def coccinelle(args, config, env): + """Run coccinelle on all files matching the provided pattern.""" + root = '/' if PurePath(args.cocci).is_absolute() else '.' + count = 0 + for cocci_file in Path(root).glob(args.cocci): + count += 1 + run_spatch(cocci_file, args, config, env) + return count + + +def main(argv): + """Run coccicheck.""" + parser = argparse.ArgumentParser() + parser.add_argument('--verbose', '-v', action='count', default=0) + parser.add_argument('--spatch', type=PurePath, metavar='SPATCH', + default=os.environ.get('SPATCH'), + help=('Path to spatch binary. Defaults to ' + 'value of environment variable SPATCH.')) + parser.add_argument('--spflags', type=PurePath, + metavar='SPFLAGS', + default=os.environ.get('SPFLAGS', None), + help=('Flags to pass to spatch call. Defaults ' + 'to value of enviroment variable SPFLAGS.')) + parser.add_argument('--mode', choices=['patch', 'report', 'context'], + default=os.environ.get('MODE', 'report'), + help=('Mode to use for coccinelle. Defaults to ' + 'value of environment variable MODE.')) + parser.add_argument('--jobs', default=os.environ.get('JOBS', None), + help=('Number of jobs to use for spatch. Defaults to ' + 'value of environment variable JOBS.')) + parser.add_argument('--include', '-I', type=PurePath, + metavar='DIR', + help='Extra include directories.') + parser.add_argument('--patchdir', type=PurePath, metavar='DIR', + help=('Path for which patch should be created ' + 'relative to.')) + parser.add_argument('cocci', metavar='pattern', + help='Pattern for Cocci files to use.') + parser.add_argument('path', nargs='+', type=PurePath, + help='Directory or source path to process.') + + args = parser.parse_args(argv) + + if args.verbose > 1: + print("arguments:", args) + + if args.spatch is None: + parser.error('spatch is part of the Coccinelle project and is ' + 'available at http://coccinelle.lip6.fr/') + + if coccinelle(args, get_config(args), os.environ) == 0: + parser.error(f'no coccinelle files found matching {args.cocci}') + + +if __name__ == '__main__': + try: + main(sys.argv[1:]) + except KeyboardInterrupt: + print("Execution aborted") + except Exception as exc: + print(exc)