#!/usr/bin/env python3 """ Python script for building documentation. To build the docs you must have all optional dependencies for pandas installed. See the installation instructions for a list of these. Usage ----- $ python make.py clean $ python make.py html $ python make.py latex """ import argparse import csv import importlib import os import shutil import subprocess import sys import webbrowser import docutils import docutils.parsers.rst DOC_PATH = os.path.dirname(os.path.abspath(__file__)) SOURCE_PATH = os.path.join(DOC_PATH, "source") BUILD_PATH = os.path.join(DOC_PATH, "build") REDIRECTS_FILE = os.path.join(DOC_PATH, "redirects.csv") class DocBuilder: """ Class to wrap the different commands of this script. All public methods of this class can be called as parameters of the script. """ def __init__( self, num_jobs="auto", include_api=True, whatsnew=False, single_doc=None, verbosity=0, warnings_are_errors=False, no_browser=False, ) -> None: self.num_jobs = num_jobs self.include_api = include_api self.whatsnew = whatsnew self.verbosity = verbosity self.warnings_are_errors = warnings_are_errors self.no_browser = no_browser if single_doc: single_doc = self._process_single_doc(single_doc) os.environ["SPHINX_PATTERN"] = single_doc elif not include_api: os.environ["SPHINX_PATTERN"] = "-api" elif whatsnew: os.environ["SPHINX_PATTERN"] = "whatsnew" self.single_doc_html = None if single_doc and single_doc.endswith(".rst"): self.single_doc_html = os.path.splitext(single_doc)[0] + ".html" elif single_doc: self.single_doc_html = f"reference/api/pandas.{single_doc}.html" def _process_single_doc(self, single_doc): """ Make sure the provided value for --single is a path to an existing .rst/.ipynb file, or a pandas object that can be imported. For example, categorial.rst or pandas.DataFrame.head. For the latter, return the corresponding file path (e.g. reference/api/pandas.DataFrame.head.rst). """ base_name, extension = os.path.splitext(single_doc) if extension in (".rst", ".ipynb"): if os.path.exists(os.path.join(SOURCE_PATH, single_doc)): return single_doc else: raise FileNotFoundError(f"File {single_doc} not found") elif single_doc.startswith("pandas."): try: obj = pandas # noqa: F821 for name in single_doc.split("."): obj = getattr(obj, name) except AttributeError as err: raise ImportError(f"Could not import {single_doc}") from err else: return single_doc[len("pandas.") :] else: raise ValueError( f"--single={single_doc} not understood. " "Value should be a valid path to a .rst or .ipynb file, " "or a valid pandas object " "(e.g. categorical.rst or pandas.DataFrame.head)" ) @staticmethod def _run_os(*args) -> None: """ Execute a command as a OS terminal. Parameters ---------- *args : list of str Command and parameters to be executed Examples -------- >>> DocBuilder()._run_os("python", "--version") """ subprocess.check_call(args, stdout=sys.stdout, stderr=sys.stderr) def _sphinx_build(self, kind: str): """ Call sphinx to build documentation. Attribute `num_jobs` from the class is used. Parameters ---------- kind : {'html', 'latex', 'linkcheck'} Examples -------- >>> DocBuilder(num_jobs=4)._sphinx_build("html") """ if kind not in ("html", "latex", "linkcheck"): raise ValueError(f"kind must be html, latex or linkcheck, not {kind}") cmd = ["sphinx-build", "-b", kind] if self.num_jobs: cmd += ["-j", self.num_jobs] if self.warnings_are_errors: cmd += ["-W", "--keep-going"] if self.verbosity: cmd.append(f"-{'v' * self.verbosity}") cmd += [ "-d", os.path.join(BUILD_PATH, "doctrees"), SOURCE_PATH, os.path.join(BUILD_PATH, kind), ] return subprocess.call(cmd) def _open_browser(self, single_doc_html) -> None: """ Open a browser tab showing single """ url = os.path.join("file://", DOC_PATH, "build", "html", single_doc_html) webbrowser.open(url, new=2) def _get_page_title(self, page): """ Open the rst file `page` and extract its title. """ fname = os.path.join(SOURCE_PATH, f"{page}.rst") doc = docutils.utils.new_document( "", docutils.frontend.get_default_settings(docutils.parsers.rst.Parser), ) with open(fname, encoding="utf-8") as f: data = f.read() parser = docutils.parsers.rst.Parser() # do not generate any warning when parsing the rst with open(os.devnull, "a", encoding="utf-8") as f: doc.reporter.stream = f parser.parse(data, doc) section = next( node for node in doc.children if isinstance(node, docutils.nodes.section) ) title = next( node for node in section.children if isinstance(node, docutils.nodes.title) ) return title.astext() def _add_redirects(self) -> None: """ Create in the build directory an html file with a redirect, for every row in REDIRECTS_FILE. """ with open(REDIRECTS_FILE, encoding="utf-8") as mapping_fd: reader = csv.reader(mapping_fd) for row in reader: if not row or row[0].strip().startswith("#"): continue html_path = os.path.join(BUILD_PATH, "html") path = os.path.join(html_path, *row[0].split("/")) + ".html" if not self.include_api and ( os.path.join(html_path, "reference") in path or os.path.join(html_path, "generated") in path ): continue try: title = self._get_page_title(row[1]) except Exception: # the file can be an ipynb and not an rst, or docutils # may not be able to read the rst because it has some # sphinx specific stuff title = "this page" with open(path, "w", encoding="utf-8") as moved_page_fd: html = f"""\

The page has been moved to {title}

""" moved_page_fd.write(html) def html(self): """ Build HTML documentation. """ ret_code = self._sphinx_build("html") zip_fname = os.path.join(BUILD_PATH, "html", "pandas.zip") if os.path.exists(zip_fname): os.remove(zip_fname) # noqa: TID251 if ret_code == 0: if self.single_doc_html is not None: if not self.no_browser: self._open_browser(self.single_doc_html) else: self._add_redirects() if self.whatsnew and not self.no_browser: self._open_browser(os.path.join("whatsnew", "index.html")) return ret_code def latex(self, force=False): """ Build PDF documentation. """ if sys.platform == "win32": sys.stderr.write("latex build has not been tested on windows\n") else: ret_code = self._sphinx_build("latex") os.chdir(os.path.join(BUILD_PATH, "latex")) if force: for i in range(3): self._run_os("pdflatex", "-interaction=nonstopmode", "pandas.tex") raise SystemExit( 'You should check the file "build/latex/pandas.pdf" for problems.' ) self._run_os("make") return ret_code def latex_forced(self): """ Build PDF documentation with retries to find missing references. """ return self.latex(force=True) @staticmethod def clean() -> None: """ Clean documentation generated files. """ shutil.rmtree(BUILD_PATH, ignore_errors=True) shutil.rmtree(os.path.join(SOURCE_PATH, "reference", "api"), ignore_errors=True) def zip_html(self) -> None: """ Compress HTML documentation into a zip file. """ zip_fname = os.path.join(BUILD_PATH, "html", "pandas.zip") if os.path.exists(zip_fname): os.remove(zip_fname) # noqa: TID251 dirname = os.path.join(BUILD_PATH, "html") fnames = os.listdir(dirname) os.chdir(dirname) self._run_os("zip", zip_fname, "-r", "-q", *fnames) def linkcheck(self): """ Check for broken links in the documentation. """ return self._sphinx_build("linkcheck") def main(): cmds = [method for method in dir(DocBuilder) if not method.startswith("_")] joined = ",".join(cmds) argparser = argparse.ArgumentParser( description="pandas documentation builder", epilog=f"Commands: {joined}" ) joined = ", ".join(cmds) argparser.add_argument( "command", nargs="?", default="html", help=f"command to run: {joined}" ) argparser.add_argument( "--num-jobs", default="auto", help="number of jobs used by sphinx-build" ) argparser.add_argument( "--no-api", default=False, help="omit api and autosummary", action="store_true" ) argparser.add_argument( "--whatsnew", default=False, help="only build whatsnew (and api for links)", action="store_true", ) argparser.add_argument( "--single", metavar="FILENAME", type=str, default=None, help=( "filename (relative to the 'source' folder) of section or method name to " "compile, e.g. 'development/contributing.rst', " "'pandas.DataFrame.join'" ), ) argparser.add_argument( "--python-path", type=str, default=os.path.dirname(DOC_PATH), help="path" ) argparser.add_argument( "-v", action="count", dest="verbosity", default=0, help=( "increase verbosity (can be repeated), passed to the sphinx build command" ), ) argparser.add_argument( "--warnings-are-errors", "-W", action="store_true", help="fail if warnings are raised", ) argparser.add_argument( "--no-browser", help="Don't open browser", default=False, action="store_true", ) args = argparser.parse_args() if args.command not in cmds: joined = ", ".join(cmds) raise ValueError(f"Unknown command {args.command}. Available options: {joined}") # Below we update both os.environ and sys.path. The former is used by # external libraries (namely Sphinx) to compile this module and resolve # the import of `python_path` correctly. The latter is used to resolve # the import within the module, injecting it into the global namespace os.environ["PYTHONPATH"] = args.python_path sys.path.insert(0, args.python_path) globals()["pandas"] = importlib.import_module("pandas") # Set the matplotlib backend to the non-interactive Agg backend for all # child processes. os.environ["MPLBACKEND"] = "module://matplotlib.backends.backend_agg" builder = DocBuilder( args.num_jobs, not args.no_api, args.whatsnew, args.single, args.verbosity, args.warnings_are_errors, args.no_browser, ) return getattr(builder, args.command)() if __name__ == "__main__": sys.exit(main())