From 0670e4923c52e328baeed306bb3c2f26031e2038 Mon Sep 17 00:00:00 2001 From: Mats Kindahl Date: Sun, 29 Dec 2024 19:35:58 +0100 Subject: [PATCH 1/7] Add initial coccicheck script The coccicheck.py script can be used to run several semantics patches on a source tree to either generate a report, see the context of the modification (what lines that requires changes), or generate a patch to correct an issue. usage: coccicheck.py [-h] [--verbose] [--spatch SPATCH] [--spflags SPFLAGS] [--mode {patch,report,context}] [--jobs JOBS] [--include DIR] [--patchdir DIR] pattern path [path ...] positional arguments: pattern Pattern for Cocci files to use. path Directory or source path to process. options: -h, --help show this help message and exit --verbose, -v --spatch SPATCH Path to spatch binary. Defaults to value of environment variable SPATCH. --spflags SPFLAGS Flags to pass to spatch call. Defaults to value of enviroment variable SPFLAGS. --mode {patch,report,context} Mode to use for coccinelle. Defaults to value of environment variable MODE. --jobs JOBS Number of jobs to use for spatch. Defaults to value of environment variable JOBS. --include DIR, -I DIR Extra include directories. --patchdir DIR Path for which patch should be created relative to. --- src/tools/coccicheck.py | 185 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 185 insertions(+) create mode 100755 src/tools/coccicheck.py diff --git a/src/tools/coccicheck.py b/src/tools/coccicheck.py new file mode 100755 index 000000000000..838f8184c547 --- /dev/null +++ b/src/tools/coccicheck.py @@ -0,0 +1,185 @@ +#!/usr/bin/env python3 + +"""Run Coccinelle on a set of files and directories. + +This is a re-written version of the Linux ``coccicheck`` script. + +Coccicheck can run in two different modes (the original have four +different modes): + +- *patch*: patch files using the cocci file. + +- *report*: report will report any improvements that this script can + make, but not show any patch. + +- *context*: show the context where the patch can be applied. + +The program will take a single cocci file and call spatch(1) with a +set of paths that can be either files or directories. + +When starting, the cocci file will be parsed and any lines containing +"Options:" or "Requires:" will be treated specially. + +- Lines containing "Options:" will have a list of options to add to + the call of the spatch(1) program. These options will be added last. + +- Lines containing "Requires:" can contain a version of spatch(1) that + is required for this cocci file. If the version requirements are not + satisfied, the file will not be used. + +When calling spatch(1), it will set the virtual rules "patch", +"report", or "context" and the cocci file can use these to act +differently depending on the mode. + +The following environment variables can be set: + +SPATCH: Path to spatch program. This will be used if no path is + passed using the option --spatch. + +SPFLAGS: Extra flags to use when calling spatch. These will be added + last. + +MODE: Mode to use. It will be used if no --mode is passed to + coccicheck.py. + +""" + +import argparse +import os +import sys +import subprocess +import re + +from pathlib import PurePath, Path +from packaging import version + +VERSION_CRE = re.compile( + r'spatch version (\S+) compiled with OCaml version (\S+)' +) + + +def parse_metadata(cocci_file): + """Parse metadata in Cocci file.""" + metadata = {} + with open(cocci_file) as fh: + for line in fh: + mre = re.match(r'(Options|Requires):(.*)', line, re.IGNORECASE) + if mre: + metadata[mre.group(1).lower()] = mre.group(2) + return metadata + + +def get_config(args): + """Compute configuration information.""" + # Figure out spatch version. We just need to read the first line + config = {} + cmd = [args.spatch, '--version'] + with subprocess.Popen(cmd, stdout=subprocess.PIPE, text=True) as proc: + for line in proc.stdout: + mre = VERSION_CRE.match(line) + if mre: + config['spatch_version'] = mre.group(1) + break + return config + + +def run_spatch(cocci_file, args, config, env): + """Run coccinelle on the provided file.""" + if args.verbose > 1: + print("processing cocci file", cocci_file) + spatch_version = config['spatch_version'] + metadata = parse_metadata(cocci_file) + + # Check that we have a valid version + if 'required' in metadata: + required_version = version.parse(metadata['required']) + if required_version < spatch_version: + print( + f'Skipping SmPL patch {cocci_file}: ' + f'requires {required_version} (had {spatch_version})' + ) + return + + command = [ + args.spatch, + "-D", args.mode, + "--cocci-file", cocci_file, + "--very-quiet", + ] + + if 'options' in metadata: + command.append(metadata['options']) + if args.mode == 'report': + command.append('--no-show-diff') + if args.patchdir: + command.extend(['--patch', args.patchdir]) + if args.jobs: + command.extend(['--jobs', args.jobs]) + if args.spflags: + command.append(args.spflags) + + for path in args.path: + subprocess.run(command + [path], env=env, check=True) + + +def coccinelle(args, config, env): + """Run coccinelle on all files matching the provided pattern.""" + root = '/' if PurePath(args.cocci).is_absolute() else '.' + count = 0 + for cocci_file in Path(root).glob(args.cocci): + count += 1 + run_spatch(cocci_file, args, config, env) + return count + + +def main(argv): + """Run coccicheck.""" + parser = argparse.ArgumentParser() + parser.add_argument('--verbose', '-v', action='count', default=0) + parser.add_argument('--spatch', type=PurePath, metavar='SPATCH', + default=os.environ.get('SPATCH'), + help=('Path to spatch binary. Defaults to ' + 'value of environment variable SPATCH.')) + parser.add_argument('--spflags', type=PurePath, + metavar='SPFLAGS', + default=os.environ.get('SPFLAGS', None), + help=('Flags to pass to spatch call. Defaults ' + 'to value of enviroment variable SPFLAGS.')) + parser.add_argument('--mode', choices=['patch', 'report', 'context'], + default=os.environ.get('MODE', 'report'), + help=('Mode to use for coccinelle. Defaults to ' + 'value of environment variable MODE.')) + parser.add_argument('--jobs', default=os.environ.get('JOBS', None), + help=('Number of jobs to use for spatch. Defaults to ' + 'value of environment variable JOBS.')) + parser.add_argument('--include', '-I', type=PurePath, + metavar='DIR', + help='Extra include directories.') + parser.add_argument('--patchdir', type=PurePath, metavar='DIR', + help=('Path for which patch should be created ' + 'relative to.')) + parser.add_argument('cocci', metavar='pattern', + help='Pattern for Cocci files to use.') + parser.add_argument('path', nargs='+', type=PurePath, + help='Directory or source path to process.') + + args = parser.parse_args(argv) + + if args.verbose > 1: + print("arguments:", args) + + if args.spatch is None: + parser.error('spatch is part of the Coccinelle project and is ' + 'available at http://coccinelle.lip6.fr/') + + if coccinelle(args, get_config(args), os.environ) == 0: + parser.error(f'no coccinelle files found matching {args.cocci}') + + +if __name__ == '__main__': + try: + main(sys.argv[1:]) + except KeyboardInterrupt: + print("Execution aborted") + except Exception as exc: + print(exc) From c1f715592bed24a8d1952c8f945c2cf2d74dc890 Mon Sep 17 00:00:00 2001 From: Mats Kindahl Date: Mon, 30 Dec 2024 19:58:07 +0100 Subject: [PATCH 2/7] Create coccicheck target for autoconf This adds a coccicheck target for the autoconf-based build system. The coccicheck target accepts one parameter MODE, which can be either "patch", "report", or "context". The "patch" mode will generate a patch that can be applied to the source tree, the "report" mode will generate a list of file locations with information about what can be changed, and the "context" mode will just highlight the line that will be affected by the semantic patch. The following will generate a patch and apply it to the source code tree: make coccicheck MODE=patch | patch -p1 --- configure | 100 ++++++++++++++++++++++++++++++++++++++--- configure.ac | 12 +++++ src/Makefile.global.in | 24 +++++++++- src/makefiles/pgxs.mk | 3 ++ 4 files changed, 132 insertions(+), 7 deletions(-) diff --git a/configure b/configure index 30d949c3c466..6b5be789cc97 100755 --- a/configure +++ b/configure @@ -775,6 +775,9 @@ enable_coverage GENHTML LCOV GCOV +enable_coccicheck +SPFLAGS +SPATCH enable_debug enable_rpath default_port @@ -842,6 +845,7 @@ with_pgport enable_rpath enable_debug enable_profiling +enable_coccicheck enable_coverage enable_dtrace enable_tap_tests @@ -1540,6 +1544,7 @@ Optional Features: executables --enable-debug build with debugging symbols (-g) --enable-profiling build with profiling enabled + --enable-coccicheck enable Coccinelle checks (requires spatch) --enable-coverage build with coverage testing instrumentation --enable-dtrace build with DTrace support --enable-tap-tests enable TAP tests (requires Perl and IPC::Run) @@ -3341,6 +3346,91 @@ fi +# +# --enable-coccicheck enables Coccinelle check target "coccicheck" +# + + +# Check whether --enable-coccicheck was given. +if test "${enable_coccicheck+set}" = set; then : + enableval=$enable_coccicheck; + case $enableval in + yes) + if test -z "$SPATCH"; then + for ac_prog in spatch +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_SPATCH+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $SPATCH in + [\\/]* | ?:[\\/]*) + ac_cv_path_SPATCH="$SPATCH" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_SPATCH="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +SPATCH=$ac_cv_path_SPATCH +if test -n "$SPATCH"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $SPATCH" >&5 +$as_echo "$SPATCH" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$SPATCH" && break +done + +else + # Report the value of SPATCH in configure's output in all cases. + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for SPATCH" >&5 +$as_echo_n "checking for SPATCH... " >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $SPATCH" >&5 +$as_echo "$SPATCH" >&6; } +fi + +if test -z "$SPATCH"; then + as_fn_error $? "spatch not found" "$LINENO" 5 +fi + + ;; + no) + : + ;; + *) + as_fn_error $? "no argument expected for --enable-coccicheck option" "$LINENO" 5 + ;; + esac + +else + enable_coccicheck=no + +fi + + + + # # --enable-coverage enables generation of code coverage metrics with gcov # @@ -15133,7 +15223,7 @@ else We can't simply define LARGE_OFF_T to be 9223372036854775807, since some C++ compilers masquerading as C compilers incorrectly reject 9223372036854775807. */ -#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) +#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31)) int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 && LARGE_OFF_T % 2147483647 == 1) ? 1 : -1]; @@ -15179,7 +15269,7 @@ else We can't simply define LARGE_OFF_T to be 9223372036854775807, since some C++ compilers masquerading as C compilers incorrectly reject 9223372036854775807. */ -#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) +#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31)) int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 && LARGE_OFF_T % 2147483647 == 1) ? 1 : -1]; @@ -15203,7 +15293,7 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext We can't simply define LARGE_OFF_T to be 9223372036854775807, since some C++ compilers masquerading as C compilers incorrectly reject 9223372036854775807. */ -#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) +#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31)) int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 && LARGE_OFF_T % 2147483647 == 1) ? 1 : -1]; @@ -15248,7 +15338,7 @@ else We can't simply define LARGE_OFF_T to be 9223372036854775807, since some C++ compilers masquerading as C compilers incorrectly reject 9223372036854775807. */ -#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) +#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31)) int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 && LARGE_OFF_T % 2147483647 == 1) ? 1 : -1]; @@ -15272,7 +15362,7 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext We can't simply define LARGE_OFF_T to be 9223372036854775807, since some C++ compilers masquerading as C compilers incorrectly reject 9223372036854775807. */ -#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) +#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31)) int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 && LARGE_OFF_T % 2147483647 == 1) ? 1 : -1]; diff --git a/configure.ac b/configure.ac index 25cdfcf65af7..15db95813b00 100644 --- a/configure.ac +++ b/configure.ac @@ -199,6 +199,18 @@ AC_SUBST(enable_debug) PGAC_ARG_BOOL(enable, profiling, no, [build with profiling enabled ]) +# +# --enable-coccicheck enables Coccinelle check target "coccicheck" +# +PGAC_ARG_BOOL(enable, coccicheck, no, + [enable Coccinelle checks (requires spatch)], +[PGAC_PATH_PROGS(SPATCH, spatch) +if test -z "$SPATCH"; then + AC_MSG_ERROR([spatch not found]) +fi +AC_SUBST(SPFLAGS)]) +AC_SUBST(enable_coccicheck) + # # --enable-coverage enables generation of code coverage metrics with gcov # diff --git a/src/Makefile.global.in b/src/Makefile.global.in index cce29a37ac50..7066319b4392 100644 --- a/src/Makefile.global.in +++ b/src/Makefile.global.in @@ -19,7 +19,7 @@ # # Meta configuration -standard_targets = all install installdirs uninstall clean distclean coverage check checkprep installcheck init-po update-po +standard_targets = all install installdirs uninstall clean distclean coccicheck coverage check checkprep installcheck init-po update-po # these targets should recurse even into subdirectories not being built: standard_always_targets = clean distclean @@ -207,6 +207,7 @@ enable_rpath = @enable_rpath@ enable_nls = @enable_nls@ enable_debug = @enable_debug@ enable_dtrace = @enable_dtrace@ +enable_coccicheck = @enable_coccicheck@ enable_coverage = @enable_coverage@ enable_injection_points = @enable_injection_points@ enable_tap_tests = @enable_tap_tests@ @@ -383,7 +384,7 @@ CLDR_VERSION = 45 # If a particular subdirectory knows this isn't needed in itself or its # children, it can set NO_GENERATED_HEADERS. -all install check installcheck: submake-generated-headers +all install check installcheck coccicheck: submake-generated-headers .PHONY: submake-generated-headers @@ -532,6 +533,11 @@ FOP = @FOP@ XMLLINT = @XMLLINT@ XSLTPROC = @XSLTPROC@ +# Coccinelle + +SPATCH = @SPATCH@ +SPFLAGS = @SPFLAGS@ + # Code coverage GCOV = @GCOV@ @@ -1002,6 +1008,20 @@ endif # nls.mk endif # enable_nls +########################################################################## +# +# Coccinelle checks +# + +ifeq ($(enable_coccicheck), yes) +coccicheck_py = $(top_srcdir)/src/tools/coccicheck.py +coccicheck = SPATCH=$(SPATCH) SPFLAGS=$(SPFLAGS) $(PYTHON) $(coccicheck_py) + +.PHONY: coccicheck +coccicheck: + $(coccicheck) --mode=$(MODE) 'cocci/**/*.cocci' $(top_srcdir) +endif # enable_coccicheck + ########################################################################## # # Coverage diff --git a/src/makefiles/pgxs.mk b/src/makefiles/pgxs.mk index 0de3737e789b..144459dccd2f 100644 --- a/src/makefiles/pgxs.mk +++ b/src/makefiles/pgxs.mk @@ -95,6 +95,9 @@ endif ifeq ($(FLEX),) FLEX = flex endif +ifeq ($(SPATCH),) +SPATCH = spatch +endif endif # PGXS From 18bef14c7efe31a86142563d45c1543312f0d0b0 Mon Sep 17 00:00:00 2001 From: Mats Kindahl Date: Wed, 1 Jan 2025 14:15:51 +0100 Subject: [PATCH 3/7] Add meson build for coccicheck This commit adds a run target `coccicheck` to meson build files. Since ninja does not accept parameters the same way make does, there are three run targets defined---"coccicheck-patch", "coccicheck-report", and "coccicheck-context"---that you can use to generate a patch, get a report, and get the context respectively. For example, to patch the tree from the "build" subdirectory created by the meson run: ninja coccicheck-patch | patch -d .. -p1 --- meson.build | 30 ++++++++++++++++++++++++++++++ meson_options.txt | 7 ++++++- src/makefiles/meson.build | 6 ++++++ 3 files changed, 42 insertions(+), 1 deletion(-) diff --git a/meson.build b/meson.build index b8da4966297d..7fca3a87a110 100644 --- a/meson.build +++ b/meson.build @@ -348,6 +348,7 @@ missing = find_program('config/missing', native: true) cp = find_program('cp', required: false, native: true) xmllint_bin = find_program(get_option('XMLLINT'), native: true, required: false) xsltproc_bin = find_program(get_option('XSLTPROC'), native: true, required: false) +spatch = find_program(get_option('SPATCH'), native: true, required: false) bison_flags = [] if bison.found() @@ -1649,6 +1650,34 @@ else endif +############################################################### +# Option: Coccinelle checks +############################################################### + +coccicheck_opt = get_option('coccicheck') +coccicheck_dep = not_found_dep +if not coccicheck_opt.disabled() + if spatch.found() + coccicheck_dep = declare_dependency() + elif coccicheck_opt.enabled() + error('missing required tools (spatch needed) for Coccinelle checks') + endif +endif + +if coccicheck_opt.enabled() + coccicheck_modes = ['context', 'report', 'patch'] + foreach mode : coccicheck_modes + run_target('coccicheck-' + mode, + command: [python, files('src/tools/coccicheck.py'), + '--mode', mode, + '--spatch', spatch, + '--patchdir', '@SOURCE_ROOT@', + '@SOURCE_ROOT@/cocci/**/*.cocci', + '@SOURCE_ROOT@/src', + '@SOURCE_ROOT@/contrib', + ]) + endforeach +endif ############################################################### # Compiler tests @@ -3866,6 +3895,7 @@ if meson.version().version_compare('>=0.57') { 'bison': '@0@ @1@'.format(bison.full_path(), bison_version), 'dtrace': dtrace, + 'spatch': spatch, 'flex': '@0@ @1@'.format(flex.full_path(), flex_version), }, section: 'Programs', diff --git a/meson_options.txt b/meson_options.txt index dd7126da3a73..57cd046bdfb8 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -43,6 +43,9 @@ option('cassert', type: 'boolean', value: false, option('tap_tests', type: 'feature', value: 'auto', description: 'Enable TAP tests') +option('coccicheck', type: 'feature', value: 'auto', + description: 'Enable Coccinelle checks') + option('injection_points', type: 'boolean', value: false, description: 'Enable injection points') @@ -52,7 +55,6 @@ option('PG_TEST_EXTRA', type: 'string', value: '', option('PG_GIT_REVISION', type: 'string', value: 'HEAD', description: 'git revision to be packaged by pgdist target') - # Compilation options option('extra_include_dirs', type: 'array', value: [], @@ -198,6 +200,9 @@ option('PYTHON', type: 'array', value: ['python3', 'python'], option('SED', type: 'string', value: 'gsed', description: 'Path to sed binary') +option('SPATCH', type: 'string', value: 'spatch', + description: 'Path to spatch binary, used for SmPL patches') + option('STRIP', type: 'string', value: 'strip', description: 'Path to strip binary, used for PGXS emulation') diff --git a/src/makefiles/meson.build b/src/makefiles/meson.build index 46d8da070e82..489a86d351fb 100644 --- a/src/makefiles/meson.build +++ b/src/makefiles/meson.build @@ -57,6 +57,7 @@ pgxs_kv = { 'enable_injection_points': get_option('injection_points') ? 'yes' : 'no', 'enable_tap_tests': tap_tests_enabled ? 'yes' : 'no', 'enable_debug': get_option('debug') ? 'yes' : 'no', + 'enable_coccicheck': spatch.found() ? 'yes' : 'no', 'enable_coverage': 'no', 'enable_dtrace': dtrace.found() ? 'yes' : 'no', @@ -151,6 +152,7 @@ pgxs_bins = { 'TAR': tar, 'ZSTD': program_zstd, 'DTRACE': dtrace, + 'SPATCH': spatch, } pgxs_empty = [ @@ -166,6 +168,10 @@ pgxs_empty = [ 'DBTOEPUB', 'FOP', + # Coccinelle is not supported by pgxs + 'SPATCH', + 'SPFLAGS', + # supporting coverage for pgxs-in-meson build doesn't seem worth it 'GENHTML', 'LCOV', From cebab18604993ac79a819f23458e8c0f0e13a7fe Mon Sep 17 00:00:00 2001 From: Mats Kindahl Date: Sun, 5 Jan 2025 19:26:47 +0100 Subject: [PATCH 4/7] Semantic patch for sizeof() using palloc() If palloc() is used to allocate elements of type T it should be assigned to a variable of type T* or risk indexes out of bounds. This semantic patch checks that allocations to variables of type T* are using sizeof(T) when allocating memory using palloc(). --- cocci/palloc_sizeof.cocci | 49 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 cocci/palloc_sizeof.cocci diff --git a/cocci/palloc_sizeof.cocci b/cocci/palloc_sizeof.cocci new file mode 100644 index 000000000000..5f8593c2687d --- /dev/null +++ b/cocci/palloc_sizeof.cocci @@ -0,0 +1,49 @@ +virtual report +virtual context +virtual patch + +@initialize:python@ +@@ +import re + +CONST_CRE = re.compile(r'\bconst\b') + +def is_simple_type(s): + return s != 'void' and not CONST_CRE.search(s) + +@r1 depends on report || context@ +type T1 : script:python () { is_simple_type(T1) }; +idexpression T1 *I; +type T2 != T1; +position p; +expression E; +identifier func = {palloc, palloc0}; +@@ +( +* I = func@p(sizeof(T2)) +| +* I = func@p(E * sizeof(T2)) +) + +@script:python depends on report@ +T1 << r1.T1; +T2 << r1.T2; +I << r1.I; +p << r1.p; +@@ +coccilib.report.print_report(p[0], f"'{I}' has type '{T1}*' but 'sizeof({T2})' is used to allocate memory") + +@depends on patch@ +type T1 : script:python () { is_simple_type(T1) }; +idexpression T1 *I; +type T2 != T1; +expression E; +identifier func = {palloc, palloc0}; +@@ +( +- I = func(sizeof(T2)) ++ I = func(sizeof(T1)) +| +- I = func(E * sizeof(T2)) ++ I = func(E * sizeof(T1)) +) From c7f0fa4c1cda689b829802149dda908550d1aa62 Mon Sep 17 00:00:00 2001 From: Mats Kindahl Date: Sun, 29 Dec 2024 20:23:25 +0100 Subject: [PATCH 5/7] Semantic patch for palloc_array and palloc_object Macros were added to the palloc API in commit 2016055a92f to improve type-safety, but very few instances were replaced. This adds a cocci script to do that replacement. The semantic patch deliberately do not replace instances where the type of the variable and the type used in the macro does not match. --- cocci/palloc_array.cocci | 157 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 157 insertions(+) create mode 100644 cocci/palloc_array.cocci diff --git a/cocci/palloc_array.cocci b/cocci/palloc_array.cocci new file mode 100644 index 000000000000..aeeab74c3a9d --- /dev/null +++ b/cocci/palloc_array.cocci @@ -0,0 +1,157 @@ +// Since PG16 there are array versions of common palloc operations, so +// we can use those instead. +// +// We ignore cases where we have a anonymous struct and also when the +// type of the variable being assigned to is different from the +// inferred type. +// +// Options: --no-includes --include-headers + +virtual patch +virtual report +virtual context + +// These rules (soN) are needed to rewrite types of the form +// sizeof(T[C]) to C * sizeof(T) since Cocci cannot (currently) handle +// it. +@initialize:python@ +@@ +import re + +CRE = re.compile(r'(.*)\s+\[\s+(\d+)\s+\]$') + +def is_array_type(s): + mre = CRE.match(s) + return (mre is not None) + +@so1 depends on patch@ +type T : script:python() { is_array_type(T) }; +@@ +palloc(sizeof(T)) + +@script:python so2 depends on patch@ +T << so1.T; +T2; +E; +@@ +mre = CRE.match(T) +coccinelle.T2 = cocci.make_type(mre.group(1)) +coccinelle.E = cocci.make_expr(mre.group(2)) + +@depends on patch@ +type so1.T; +type so2.T2; +expression so2.E; +@@ +- palloc(sizeof(T)) ++ palloc(E * sizeof(T2)) + +@r1 depends on report || context@ +type T !~ "^struct {"; +expression E; +position p; +idexpression T *I; +identifier alloc = {palloc0, palloc}; +@@ +* I = alloc@p(E * sizeof(T)) + +@script:python depends on report@ +p << r1.p; +alloc << r1.alloc; +@@ +coccilib.report.print_report(p[0], f"this {alloc} can be replaced with {alloc}_array") + +@depends on patch@ +type T !~ "^struct {"; +expression E; +T *P; +idexpression T* I; +constant C; +identifier alloc = {palloc0, palloc}; +fresh identifier alloc_array = alloc ## "_array"; +@@ +( +- I = (T*) alloc(E * sizeof( \( *P \| P[C] \) )) ++ I = alloc_array(T, E) +| +- I = (T*) alloc(E * sizeof(T)) ++ I = alloc_array(T, E) +| +- I = alloc(E * sizeof( \( *P \| P[C] \) )) ++ I = alloc_array(T, E) +| +- I = alloc(E * sizeof(T)) ++ I = alloc_array(T, E) +) + +@r3 depends on report || context@ +type T !~ "^struct {"; +expression E; +idexpression T *P; +idexpression T *I; +position p; +@@ +* I = repalloc@p(P, E * sizeof(T)) + +@script:python depends on report@ +p << r3.p; +@@ +coccilib.report.print_report(p[0], "this repalloc can be replaced with repalloc_array") + +@depends on patch@ +type T !~ "^struct {"; +expression E; +idexpression T *P1; +idexpression T *P2; +idexpression T *I; +constant C; +@@ +( +- I = (T*) repalloc(P1, E * sizeof( \( *P2 \| P2[C] \) )) ++ I = repalloc_array(P1, T, E) +| +- I = (T*) repalloc(P1, E * sizeof(T)) ++ I = repalloc_array(P1, T, E) +| +- I = repalloc(P1, E * sizeof( \( *P2 \| P2[C] \) )) ++ I = repalloc_array(P1, T, E) +| +- I = repalloc(P1, E * sizeof(T)) ++ I = repalloc_array(P1, T, E) +) + +@r4 depends on report || context@ +type T !~ "^struct {"; +position p; +idexpression T* I; +identifier alloc = {palloc, palloc0}; +@@ +* I = alloc@p(sizeof(T)) + +@script:python depends on report@ +p << r4.p; +alloc << r4.alloc; +@@ +coccilib.report.print_report(p[0], f"this {alloc} can be replaced with {alloc}_object") + +@depends on patch@ +type T !~ "^struct {"; +T* P; +idexpression T *I; +constant C; +identifier alloc = {palloc, palloc0}; +fresh identifier alloc_object = alloc ## "_object"; +@@ +( +- I = (T*) alloc(sizeof( \( *P \| P[C] \) )) ++ I = alloc_object(T) +| +- I = (T*) alloc(sizeof(T)) ++ I = alloc_object(T) +| +- I = alloc(sizeof( \( *P \| P[C] \) )) ++ I = alloc_object(T) +| +- I = alloc(sizeof(T)) ++ I = alloc_object(T) +) From 6a556480c91f4b99655322a03cdd584e16afb445 Mon Sep 17 00:00:00 2001 From: Mats Kindahl Date: Thu, 23 Jan 2025 02:46:14 +0100 Subject: [PATCH 6/7] Semantic patch for pg_cmp_* functions In commit 3b42bdb4716 and 6b80394781c overflow-safe comparison functions where introduced, but they are not widely used. This semantic patch identifies some of the more common cases and replaces them with calls to the corresponding pg_cmp_* function. --- cocci/use_pg_cmp.cocci | 125 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 cocci/use_pg_cmp.cocci diff --git a/cocci/use_pg_cmp.cocci b/cocci/use_pg_cmp.cocci new file mode 100644 index 000000000000..8a258e61e5d1 --- /dev/null +++ b/cocci/use_pg_cmp.cocci @@ -0,0 +1,125 @@ +// Find cases where we can use the new pg_cmp_* functions. +// +// Copyright 2025 Mats Kindahl, Timescale. +// +// Options: --no-includes --include-headers + +virtual report +virtual context +virtual patch + +@initialize:python@ +@@ + +import re + +TYPMAP = { + 'BlockNumber': 'pg_cmp_u32', + 'ForkNumber': 'pg_cmp_s32', + 'OffsetNumber': 'pg_cmp_s16', + 'int': 'pg_cmp_s32', + 'int16': 'pg_cmp_s16', + 'int32': 'pg_cmp_s32', + 'uint16': 'pg_cmp_u16', + 'uint32': 'pg_cmp_u32', + 'unsigned int': 'pg_cmp_u32', +} + +def is_valid(expr): + return not re.search(r'DatumGet[A-Za-z]+', expr) + +@r1e depends on context || report expression@ +type TypeName : script:python() { TypeName in TYPMAP }; +position pos; +TypeName lhs : script:python() { is_valid(lhs) }; +TypeName rhs : script:python() { is_valid(rhs) }; +@@ +* lhs@pos < rhs ? -1 : lhs > rhs ? 1 : 0 + +@script:python depends on report@ +lhs << r1e.lhs; +rhs << r1e.rhs; +pos << r1e.pos; +@@ +coccilib.report.print_report(pos[0], f"conditional checks between '{lhs}' and '{rhs}' can be replaced with a PostgreSQL comparison function") + +@r1 depends on context || report@ +type TypeName : script:python() { TypeName in TYPMAP }; +position pos; +TypeName lhs : script:python() { is_valid(lhs) }; +TypeName rhs : script:python() { is_valid(rhs) }; +@@ +( +* if@pos (lhs < rhs) return -1; else if (lhs > rhs) return 1; return 0; +| +* if@pos (lhs < rhs) return -1; else if (lhs > rhs) return 1; else return 0; +| +* if@pos (lhs < rhs) return -1; if (lhs > rhs) return 1; return 0; +| +* if@pos (lhs > rhs) return 1; if (lhs < rhs) return -1; return 0; +| +* if@pos (lhs == rhs) return 0; if (lhs > rhs) return 1; return -1; +| +* if@pos (lhs == rhs) return 0; return lhs > rhs ? 1 : -1; +| +* if@pos (lhs == rhs) return 0; return lhs < rhs ? -1 : 1; +) + +@script:python depends on report@ +lhs << r1.lhs; +rhs << r1.rhs; +pos << r1.pos; +@@ +coccilib.report.print_report(pos[0], f"conditional checks between '{lhs}' and '{rhs}' can be replaced with a PostgreSQL comparison function") + +@expr_repl depends on patch expression@ +type TypeName : script:python() { TypeName in TYPMAP }; +fresh identifier cmp = script:python(TypeName) { TYPMAP[TypeName] }; +TypeName lhs : script:python() { is_valid(lhs) }; +TypeName rhs : script:python() { is_valid(rhs) }; +@@ +- lhs < rhs ? -1 : lhs > rhs ? 1 : 0 ++ cmp(lhs,rhs) + +@stmt_repl depends on patch@ +type TypeName : script:python() { TypeName in TYPMAP }; +fresh identifier cmp = script:python(TypeName) { TYPMAP[TypeName] }; +TypeName lhs : script:python() { is_valid(lhs) }; +TypeName rhs : script:python() { is_valid(rhs) }; +@@ +( +- if (lhs < rhs) return -1; if (lhs > rhs) return 1; return 0; ++ return cmp(lhs,rhs); +| +- if (lhs < rhs) return -1; else if (lhs > rhs) return 1; return 0; ++ return cmp(lhs,rhs); +| +- if (lhs < rhs) return -1; else if (lhs > rhs) return 1; else return 0; ++ return cmp(lhs,rhs); +| +- if (lhs > rhs) return 1; if (lhs < rhs) return -1; return 0; ++ return cmp(lhs,rhs); +| +- if (lhs > rhs) return 1; else if (lhs < rhs) return -1; return 0; ++ return cmp(lhs,rhs); +| +- if (lhs == rhs) return 0; if (lhs > rhs) return 1; return -1; ++ return cmp(lhs,rhs); +| +- if (lhs == rhs) return 0; return lhs > rhs ? 1 : -1; ++ return cmp(lhs,rhs); +| +- if (lhs == rhs) return 0; return lhs < rhs ? -1 : 1; ++ return cmp(lhs,rhs); +) + +// Add an include if there were none and we had to do some +// replacements +@has_include depends on patch@ +@@ + #include "common/int.h" + +@depends on patch && !has_include && (stmt_repl || expr_repl)@ +@@ + #include ... ++ #include "common/int.h" From 3f1ded6f5b0ac2d0c772d2527fe9538b5f84848e Mon Sep 17 00:00:00 2001 From: Mats Kindahl Date: Tue, 28 Jan 2025 14:09:41 +0100 Subject: [PATCH 7/7] Semantic patch to use stack-allocated StringInfoData This semantic patch replace uses of StringInfo with StringInfoData where the info is dynamically allocated but (optionally) freed at the end of the block. This will avoid one dynamic allocation that otherwise have to be dealt with. For example, this code: StringInfo info = makeStringInfo(); ... appendStringInfo(info, ...); ... return do_stuff(..., info->data, ...); Can be replaced with: StringInfoData info; initStringInfo(&info); ... appendStringInfo(&info, ...); ... return do_stuff(..., info.data, ...); It does not do a replacement in these cases: - If the variable is assigned to an expression. In this case, the pointer can "leak" outside the function either through a global variable or a parameter assignment. - If an assignment is done to the expression. This cannot leak the data, but could mean a value-assignment of a structure, so we avoid this case. - If the pointer is returned. --- cocci/use_stringinfodata.cocci | 155 +++++++++++++++++++++++++++++++++ 1 file changed, 155 insertions(+) create mode 100644 cocci/use_stringinfodata.cocci diff --git a/cocci/use_stringinfodata.cocci b/cocci/use_stringinfodata.cocci new file mode 100644 index 000000000000..4186027f8c92 --- /dev/null +++ b/cocci/use_stringinfodata.cocci @@ -0,0 +1,155 @@ +// Replace uses of StringInfo with StringInfoData where the info is +// dynamically allocated but (optionally) freed at the end of the +// block. This will avoid one dynamic allocation that otherwise have +// to be dealt with. +// +// For example, this code: +// +// StringInfo info = makeStringInfo(); +// ... +// appendStringInfo(info, ...); +// ... +// return do_stuff(..., info->data, ...); +// +// Can be replaced with: +// +// StringInfoData info; +// initStringInfo(&info); +// ... +// appendStringInfo(&info, ...); +// ... +// return do_stuff(..., info.data, ...); + +virtual report +virtual context +virtual patch + +// This rule captures the position of the makeStringInfo() and bases +// all changes around that. It matches the case that we should *not* +// replace, that is, those that either (1) return the pointer or (2) +// assign the pointer to a variable or (3) assign a variable to the +// pointer. +// +// The first two cases are matched because they could potentially leak +// the pointer outside the function, for some expressions, but the +// last one is just a convenience. +// +// If we replace this, the resulting change will result in a value +// copy of a structure, which might not be optimal, so we do not do a +// replacement. +@id1 exists@ +typedef StringInfo; +local idexpression StringInfo info; +position pos; +expression E; +@@ + info@pos = makeStringInfo() + ... +( + return info; +| + info = E +| + E = info +) + +@r1 depends on !patch disable decl_init exists@ +identifier info, fld; +position dpos, pos != id1.pos; +@@ +( +* StringInfo@dpos info; + ... +* info@pos = makeStringInfo(); +| +* StringInfo@dpos info@pos = makeStringInfo(); +) +<... +( +* \(pfree\|destroyStringInfo\)(info); +| +* info->fld +| +* *info +| +* info +) +...> + +@script:python depends on report@ +info << r1.info; +dpos << r1.dpos; +@@ +coccilib.report.print_report(dpos[0], f"Variable '{info}' of type StringInfo can be defined using StringInfoData") + +@depends on patch disable decl_init exists@ +identifier info, fld; +position pos != id1.pos; +@@ +- StringInfo info; ++ StringInfoData info; + ... +- info@pos = makeStringInfo(); ++ initStringInfo(&info); +<... +( +- \(destroyStringInfo\|pfree\)(info); +| + info +- ->fld ++ .fld +| +- *info ++ info +| +- info ++ &info +) +...> + +// Here we repeat the matching of the "bad case" since we cannot +// inherit over modifications +@id2 exists@ +typedef StringInfo; +local idexpression StringInfo info; +position pos; +expression E; +@@ + info@pos = makeStringInfo() + ... +( + return info; +| + info = E +| + E = info +) + +@depends on patch exists@ +identifier info, fld; +position pos != id2.pos; +statement S, S1; +@@ +- StringInfo info@pos = makeStringInfo(); ++ StringInfoData info; + ... when != S +( +<... +( +- \(destroyStringInfo\|pfree\)(info); +| + info +- ->fld ++ .fld +| +- *info ++ info +| +- info ++ &info +) +...> +& ++ initStringInfo(&info); + S1 +)