#!/usr/bin/env python3 # Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. """Helps find commits to cherrypick into a release branch. Usage: pick_doc_commits.py --main=origin/main --release=origin/release/5.5 It will find commits on the main branch that are not on the release branch, and filter them down to the docs-only commits that should be cherrypicked. It will also print the commits that were filtered out. This tool will not actually modify the git repo, it will only print the commands to run. Must be run from inside the repo, ideally after a recent `git pull`. Does not care which branch is currently checked out. """ import argparse import datetime import re import subprocess import sys import textwrap from typing import List # The script will print extra info when this is > 0, and more at higher levels. # Controlled by the --verbose flag. verbosity = 0 def debug_log(message: str): """Prints a message to stderr if verbosity is greater than zero.""" global verbosity if verbosity > 0: sys.stderr.write(f"VERBOSE: {message}\n") def run_git(command: List[str]) -> List[str]: """Runs a git command and returns its stdout as a list of lines. Prints the command and its output to debug_log() if verbosity is greater than 1. Args: command: The args to pass to `git`, without the leading `git` itself. Returns: A list of the non-empty lines printed to stdout, without trailing newlines. Raises: Exception: The command failed. """ try: if verbosity > 1: # Higher verbosity required debug_log("Running command: 'git " + " ".join(command) + "'") result = subprocess.run(["git", *command], capture_output=True, text=True) if result.returncode != 0: raise Exception(f"Error running command '{command}':\n{result.stderr}") lines = result.stdout.split("\n") # Remove empty and whitespace-only lines. lines = [line.strip() for line in lines if line.strip()] global verbose if verbosity > 1: debug_log("-----BEGIN GIT OUTPUT-----") for line in lines: debug_log(line) debug_log("-----END GIT OUTPUT-----") return lines except Exception as e: raise Exception(f"Error running command '{command}': {e}") class Commit: """A git commit hash and its one-line message.""" def __init__(self, hash: str, message: str = ""): """Creates a new Commit with the given hash. Args: hash: The hexadecimal hash of the commit. message: The one-line summary of the commit. If empty, this method will ask git for the commit message. """ self.hash = hash.strip() if not message: # Ask git for the commit message. lines = run_git(["log", "-1", "--pretty=%s", self.hash]) # Should just be one line, but could be zero. message = " ".join(lines) self.message = message.strip() @staticmethod def from_line(line: str) -> "Commit": """Creates a Commit from a string of the form ' []'.""" parts = line.split(" ", maxsplit=1) parts = [part.strip() for part in parts if part.strip()] assert len(parts) >= 1, f"Expected at least one part in line '{line}'" return Commit(hash=parts[0], message=parts[1] if len(parts) > 1 else "") def __repr__(self): return f"Commit('{self.hash[:8]}', '{self.message}')" def __str__(self): return f"{self.hash[:8]} {self.message}" def is_doc_only_commit(commit: Commit) -> bool: """Returns True if the commit only touched "documentation files".""" def is_doc_file(path: str) -> bool: """Returns true if the path is considered to be a "documentation file".""" return ( # Everything under docs, regardless of the file type. path.startswith("docs/") # Any markdown or RST file in the repo. or path.endswith(".md") or path.endswith(".rst") ) # The first line is the full hash, and the rest are the files modified by # the commit, relative to the root of the repo. lines = run_git(["diff-tree", "--name-only", "-r", commit.hash]) all_files = frozenset(lines[1:]) doc_files = frozenset(filter(is_doc_file, all_files)) non_doc_files = all_files - doc_files is_doc_only = all_files == doc_files if verbosity > 0 and not is_doc_only: debug_log( f"{repr(commit)} touches {len(non_doc_files)} non-doc files, " + f"like '{sorted(non_doc_files)[0]}'." ) return is_doc_only def print_wrapped(text: str, width: int = 80) -> None: """Print text wrapped to fit within the given width. Indents additional lines by four spaces. """ print("\n ".join(textwrap.wrap(text, width=width - 4, break_on_hyphens=False))) def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser( description="Prints differences between git branches." ) parser.add_argument( "--main", default="origin/main", type=str, help="The name of the main (source) branch to pick commits from.", ) parser.add_argument( "--release", type=str, help="The name of the release (destination) branch to pick commits onto, " + "ideally with the 'origin/' prefix", ) parser.add_argument( "-v", "--verbose", action="count", default=0, help="Log extra output. Specify more times (-vv) for more output.", ) return parser.parse_args() def main(): args = parse_args() main_branch = args.main release_branch = args.release global verbosity verbosity = args.verbose # Returns a list of hashes that are on the main branch but not the release # branch. Each hash is preceded by `+ ` if the commit has not been cherry # picked onto the release branch, or `- ` if it has. cherry_lines = run_git(["cherry", release_branch, main_branch]) print_wrapped( f"Commits on '{main_branch}' that have already been cherry-picked into '{release_branch}':" ) if not cherry_lines: print("- ") candidate_commits = [] for line in cherry_lines: commit = Commit.from_line(line[2:]) if line.startswith("+ "): candidate_commits.append(commit) elif line.startswith("- "): print(f"- {commit}") print("") # Filter out and print the commits that touch non-documentation files. print_wrapped( f"Will not pick these commits on '{main_branch}' that touch non-documentation files:" ) if not candidate_commits: print("- ") doc_only_commits = [] for commit in candidate_commits: if is_doc_only_commit(commit): doc_only_commits.append(commit) else: print(f"- {commit}") print("") # Print the commits to cherry-pick. print_wrapped( f"Remaining '{main_branch}' commits that touch only documentation files; " + f"will be cherry-picked into '{release_branch}':" ) if not doc_only_commits: print("- ") for commit in doc_only_commits: print(f"- {commit}") print("") # Print instructions for cherry-picking the commits. if doc_only_commits: # Recommend a unique branch name. suffix = datetime.datetime.utcnow().strftime("%Y%m%d%H%M") branch_name = "cherrypick-" + release_branch.replace("/", "-") + "-" + suffix print("Cherry pick by running the commands:") print("```") print(f"git checkout {release_branch}") print( # Split lines with backslashes to make long lists more legible but # still copy-pasteable. "git cherry-pick \\\n " + " \\\n ".join([commit.hash for commit in doc_only_commits]) ) print(f"git checkout -b {branch_name}") print("```") print("") print("To verify that this worked, re-run this script with the arguments:") print("```") print(f"--main={main_branch} --release={branch_name}") print("```") print("It should show no doc-only commits to cherry-pick.") print("") print(f"Then, push {branch_name} to GitHub:") print("```") print(f"git push --set-upstream origin {branch_name}") print("```") print("") print_wrapped( "When creating the PR, remember to set the 'into' branch to be " # Remove "origin/" if present since it won't appear in the GitHub # UI. + f"'{re.sub('^origin/', '', release_branch)}'." ) else: print_wrapped( "It looks like there are no doc-only commits " + f"on '{main_branch}' to cherry-pick into '{release_branch}'." ) if __name__ == "__main__": main()