blight.tool

Encapsulations of the tools supported by blight.

View Source

   1"""
   2Encapsulations of the tools supported by blight.
   3"""
   4
   5import itertools
   6import json
   7import logging
   8import os
   9import re
  10import shlex
  11import subprocess
  12from pathlib import Path
  13from typing import Any, Dict, List, Optional, Tuple
  14
  15from blight import util
  16from blight.constants import COMPILER_FLAG_INJECTION_VARIABLES
  17from blight.enums import (
  18    BlightTool,
  19    BuildTool,
  20    CodeModel,
  21    CompilerFamily,
  22    CompilerStage,
  23    Lang,
  24    OptLevel,
  25    Std,
  26)
  27from blight.exceptions import BlightError, BuildError, SkipRun
  28from blight.protocols import (
  29    CanonicalizedArgsProtocol,
  30    IndexedUndefinesProtocol,
  31    LangProtocol,
  32)
  33from blight.util import json_helper
  34
  35logger = logging.getLogger(__name__)
  36
  37
  38RESPONSE_FILE_RECURSION_LIMIT = 64
  39"""
  40Response files can contain further `@file` arguments, because of course they can.
  41
  42Neither clang nor GCC is explicit in their documentation about their recursion limits,
  43if they have any. We choose an arbitrary limit here.
  44"""
  45
  46
  47class Tool:
  48    """
  49    Represents a generic tool wrapped by blight.
  50
  51    Every `Tool` has two views of its supplied arguments:
  52
  53    * An "effective" view, provided by `Tool.args`
  54    * A "canonicalized" view, provided by `Tool.canonicalized_args`
  55
  56    The "effective" view is used to invoke the underlying wrapped tool. It should
  57    never differ from the original arguments supplied to the invocation, **except**
  58    for when a user configures an action that **intentionally** modifies the
  59    arguments.
  60
  61    The "canonicalized" view is used to model the behavior of the underlying wrapped
  62    tool. Specific `Tool` subclasses may specialize the canonicalized view to improve
  63    modeling fidelity. For example, tools that support the `@file` syntax (see
  64    `ResponseFileMixin`) for expanding arguments may augment `canonicalized_args`
  65    to reflect a fully expanded and normalized version of the original arguments.
  66
  67    The "canonicalized" view always derives directly from the "effective" view:
  68    any modifications made to the "effective" arguments by an action will be
  69    propagated to the "canonicalized" arguments.
  70
  71    `Tool` instances cannot be created directory; a specific subclass must be used.
  72    """
  73
  74    @classmethod
  75    def build_tool(cls) -> BuildTool:
  76        """
  77        Returns the `BuildTool` enum associated with this `Tool`.
  78        """
  79        return BuildTool(cls.__name__)
  80
  81    @classmethod
  82    def blight_tool(cls) -> BlightTool:
  83        """
  84        Returns the `BlightTool` enum associated with this `Tool`.
  85        """
  86        return cls.build_tool().blight_tool
  87
  88    @classmethod
  89    def wrapped_tool(cls) -> str:
  90        """
  91        Returns the executable name or path of the tool that this blight tool wraps.
  92        """
  93        wrapped_tool = os.getenv(cls.blight_tool().env)
  94        if wrapped_tool is None:
  95            raise BlightError(f"No wrapped tool found for {cls.build_tool()}")
  96        return wrapped_tool
  97
  98    def __init__(self, args: List[str]) -> None:
  99        if self.__class__ == Tool:
 100            raise NotImplementedError(f"can't instantiate {self.__class__.__name__} directly")
 101        self._args = args
 102        self._canonicalized_args = args.copy()
 103        self._env = self._fixup_env()
 104        self._cwd = Path(os.getcwd()).resolve()
 105        self._actions = util.load_actions()
 106        self._skip_run = False
 107        self._action_results: Dict[str, Optional[Dict[str, Any]]] = {}
 108        self._journal_path = os.getenv("BLIGHT_JOURNAL_PATH")
 109
 110    def _fixup_env(self) -> Dict[str, str]:
 111        """
 112        Fixes up `os.environ` to remove any references to blight's swizzled paths,
 113        if any are present.
 114        """
 115        env = dict(os.environ)
 116        env["PATH"] = util.unswizzled_path()
 117        return env
 118
 119    def _before_run(self) -> None:
 120        for action in self._actions:
 121            try:
 122                action._before_run(self)
 123            except SkipRun:
 124                self._skip_run = True
 125
 126    def _after_run(self) -> None:
 127        for action in self._actions:
 128            action._after_run(self, run_skipped=self._skip_run)
 129
 130            if self.is_journaling():
 131                self._action_results[action.__class__.__name__] = action.result
 132
 133    def _commit_journal(self) -> None:
 134        if self.is_journaling():
 135            with util.flock_append(self._journal_path) as io:  # type: ignore
 136                json.dump(self._action_results, io, default=json_helper)
 137                # NOTE(ww): `json.dump` doesn't do this for us.
 138                io.write("\n")
 139
 140    def run(self) -> None:
 141        """
 142        Runs the wrapped tool with the original arguments.
 143        """
 144        self._before_run()
 145
 146        if not self._skip_run:
 147            status = subprocess.run([self.wrapped_tool(), *self.args], env=self._env)
 148            if status.returncode != 0:
 149                raise BuildError(
 150                    f"{self.wrapped_tool()} exited with status code {status.returncode}"
 151                )
 152
 153        self._after_run()
 154
 155        self._commit_journal()
 156
 157    def is_journaling(self) -> bool:
 158        """
 159        Returns:
 160            `True` if this `Tool` is in "journaling" mode.
 161        """
 162        return self._journal_path is not None
 163
 164    def asdict(self) -> Dict[str, Any]:
 165        """
 166        Returns:
 167            A dictionary representation of this tool
 168        """
 169
 170        return {
 171            "name": self.__class__.__name__,
 172            "wrapped_tool": self.wrapped_tool(),
 173            "args": self.args,
 174            "canonicalized_args": self.canonicalized_args,
 175            "cwd": str(self._cwd),
 176            "env": self._env,
 177        }
 178
 179    @property
 180    def args(self) -> List[str]:
 181        return self._args
 182
 183    @args.setter
 184    def args(self, args_: List[str]) -> None:
 185        self._args = args_
 186
 187        # NOTE(ww): Modifying the effective arguments also propagates
 188        # those changes to the canonicalized arguments. This shouldn't be a problem,
 189        # since mixins that specialize `canonicalized_args` call
 190        # `super.canonicalized_args` to get the most recent copy.
 191        self._canonicalized_args = args_.copy()
 192
 193    @property
 194    def canonicalized_args(self) -> List[str]:
 195        # NOTE(ww): `canonicalized_args` doesn't need an explicit setter property,
 196        # since all specializations of it are expected to modify the underlying
 197        # list.
 198        return self._canonicalized_args
 199
 200    @property
 201    def cwd(self) -> Path:
 202        """
 203        Returns the directory that this tool was run in.
 204        """
 205        return self._cwd
 206
 207    @property
 208    def inputs(self) -> List[str]:
 209        """
 210        Returns all explicit "inputs" to the tool. "Inputs" is subjectively
 211        defined to be the "main" inputs to a tool, i.e. source files and **not**
 212        additional files that *may* be passed in via options.
 213
 214        Tools may further refine the behavior of this property
 215        by overriding it with their own, more specific behavior.
 216
 217        **NOTE**: This property, more so than others, relies on heuristics.
 218
 219        Returns:
 220            A list of `str`s, representing the tool's inputs.
 221        """
 222
 223        # Our strategy here is as follows:
 224        # * Filter out any arguments that begin with "-" or "@" and
 225        #   aren't *just" "-" (since that indicates stdin).
 226        # * Then, look for arguments that are files in the tool's current
 227        #   directory.
 228        inputs = []
 229        for idx, arg in enumerate(self.canonicalized_args):
 230            if arg.startswith("-") or arg.startswith("@"):
 231                if arg == "-":
 232                    inputs.append(arg)
 233                continue
 234
 235            candidate = Path(arg)
 236            if not candidate.is_file() and not (self.cwd / candidate).is_file():
 237                # NOTE(ww): pathlib's is_file returns False for device files, e.g. /dev/stdin.
 238                # It would be perverse for a build system to use these, but maybe worth
 239                # handling.
 240                continue
 241
 242            # Annoying edge cases: most other flags that take filenames do so in
 243            # -flag=filename form, but -aux-info does it without the "=".
 244            # Similarly, we need to make sure not to catch an output flag's
 245            # argument here.
 246            if idx == 0 or self.canonicalized_args[idx - 1] not in ["-aux-info", "-o"]:
 247                inputs.append(arg)
 248
 249        return inputs
 250
 251    @property
 252    def outputs(self) -> List[str]:
 253        """
 254        Returns all "outputs" produced by the tool. "Outputs" is subjectively
 255        defined to be the "main" products of a tool, i.e. results of a particular
 256        stage or invocation and **not** any incidental or metadata files that
 257        might otherwise be created in the process.
 258
 259        Tools may further refine the behavior of this mixin-supplied property
 260        by overriding it with their own, more specific behavior.
 261
 262        Returns:
 263            A list of `str`, each of which is an output
 264        """
 265
 266        o_flag_index = util.rindex_prefix(self.canonicalized_args, "-o")
 267        if o_flag_index is None:
 268            return []
 269
 270        if self.canonicalized_args[o_flag_index] == "-o":
 271            return [self.canonicalized_args[o_flag_index + 1]]
 272
 273        # NOTE(ww): Outputs like -ofoo. Gross, but valid according to GCC.
 274        return [self.canonicalized_args[o_flag_index][2:]]
 275
 276
 277class LangMixin:
 278    """
 279    A mixin for tools that have a "language" component, i.e.
 280    those that change their behavior based on the language that they're used with.
 281    """
 282
 283    @property
 284    def lang(self: CanonicalizedArgsProtocol) -> Lang:
 285        """
 286        Returns:
 287            A `blight.enums.Lang` value representing the tool's language
 288        """
 289        logger.warning(
 290            "this API might not do what you expect; see: https://github.com/trailofbits/blight/issues/43493"
 291        )
 292
 293        x_lang_map = {"c": Lang.C, "c-header": Lang.C, "c++": Lang.Cxx, "c++-header": Lang.Cxx}
 294
 295        # First, check for `-x lang`. This overrides the language determined by
 296        # the frontend's binary name (e.g. `g++`).
 297        x_flag_index = util.rindex_prefix(self.canonicalized_args, "-x")
 298        if x_flag_index is not None:
 299            if self.canonicalized_args[x_flag_index] == "-x":
 300                # TODO(ww): Maybe bounds check.
 301                x_lang = self.canonicalized_args[x_flag_index + 1]
 302            else:
 303                # NOTE(ww): -xc and -xc++ both work, at least on GCC.
 304                x_lang = self.canonicalized_args[x_flag_index][2:]
 305            return x_lang_map.get(x_lang, Lang.Unknown)
 306
 307        # No `-x lang` means that we're operating in the frontend's default mode.
 308        if self.__class__ == CC:
 309            return Lang.C
 310        elif self.__class__ == CXX:
 311            return Lang.Cxx
 312        else:
 313            logger.debug(f"unknown default language mode for {self.__class__.__name__}")
 314            return Lang.Unknown
 315
 316
 317class StdMixin(LangMixin):
 318    """
 319    A mixin for tools that have a "standard" component, i.e.
 320    those that change their behavior based on a particular language standard.
 321    """
 322
 323    @property
 324    def std(self: LangProtocol) -> Std:
 325        """
 326        Returns:
 327            A `blight.enums.Std` value representing the tool's standard
 328        """
 329
 330        # First, a special case: if -ansi is present, we're in
 331        # C89 mode for C code and C++03 mode for C++ code.
 332        if "-ansi" in self.canonicalized_args:
 333            if self.lang == Lang.C:
 334                return Std.C89
 335            elif self.lang == Lang.Cxx:
 336                return Std.Cxx03
 337            else:
 338                logger.debug(f"-ansi passed but unknown language: {self.lang}")
 339                return Std.Unknown
 340
 341        # Experimentally, both GCC and clang respect the last -std=XXX flag passed.
 342        # See: https://stackoverflow.com/questions/40563269/passing-multiple-std-switches-to-g
 343        std_flag_index = util.rindex_prefix(self.canonicalized_args, "-std=")
 344
 345        # No -std=XXX flags? The tool is operating in its default standard mode,
 346        # which is determined by its language.
 347        if std_flag_index is None:
 348            if self.lang == Lang.C:
 349                return Std.GnuUnknown
 350            elif self.lang == Lang.Cxx:
 351                return Std.GnuxxUnknown
 352            else:
 353                logger.debug(f"no -std= flag and unknown language: {self.lang}")
 354                return Std.Unknown
 355
 356        last_std_flag = self.canonicalized_args[std_flag_index]
 357        std_flag_map = {
 358            # C89 flags.
 359            "-std=c89": Std.C89,
 360            "-std=c90": Std.C89,
 361            "-std=iso9899:1990": Std.C89,
 362            # C94 flags.
 363            "-std=iso9899:199409": Std.C94,
 364            # C99 flags.
 365            "-std=c99": Std.C99,
 366            "-std=c9x": Std.C99,
 367            "-std=iso9899:1999": Std.C99,
 368            "-std=iso9899:199x": Std.C99,
 369            # C11 flags.
 370            "-std=c11": Std.C11,
 371            "-std=c1x": Std.C11,
 372            "-std=iso9899:2011": Std.C11,
 373            # C17 flags.
 374            "-std=c17": Std.C17,
 375            "-std=c18": Std.C17,
 376            "-std=iso9899:2017": Std.C17,
 377            "-std=iso9899:2018": Std.C17,
 378            # C20 (presumptive) flags.
 379            "-std=c2x": Std.C2x,
 380            # GNU89 flags.
 381            "-std=gnu89": Std.Gnu89,
 382            "-std=gnu90": Std.Gnu89,
 383            # GNU99 flags.
 384            "-std=gnu99": Std.Gnu99,
 385            "-std=gnu9x": Std.Gnu99,
 386            # GNU11 flags.
 387            "-std=gnu11": Std.Gnu11,
 388            "-std=gnu1x": Std.Gnu11,
 389            # GNU17 flags.
 390            "-std=gnu17": Std.Gnu17,
 391            "-std=gnu18": Std.Gnu17,
 392            # GNU20 (presumptive) flags.
 393            "-std=gnu2x": Std.Gnu2x,
 394            # C++03 flags.
 395            # NOTE(ww): Both gcc and clang treat C++98 mode as C++03 mode.
 396            "-std=c++98": Std.Cxx03,
 397            "-std=c++03": Std.Cxx03,
 398            # C++11 flags.
 399            "-std=c++11": Std.Cxx11,
 400            "-std=c++0x": Std.Cxx11,
 401            # C++14 flags.
 402            "-std=c++14": Std.Cxx14,
 403            "-std=c++1y": Std.Cxx14,
 404            # C++17 flags.
 405            "-std=c++17": Std.Cxx17,
 406            "-std=c++1z": Std.Cxx17,
 407            # C++20 (presumptive) flags.
 408            "-std=c++2a": Std.Cxx2a,
 409            "-std=c++20": Std.Cxx2a,
 410            # GNU++03 flags.
 411            "-std=gnu++98": Std.Gnuxx03,
 412            "-std=gnu++03": Std.Gnuxx03,
 413            # GNU++11 flags.
 414            "-std=gnu++11": Std.Gnuxx11,
 415            "-std=gnu++0x": Std.Gnuxx11,
 416            # GNU++14 flags.
 417            "-std=gnu++14": Std.Gnuxx14,
 418            "-std=gnu++1y": Std.Gnuxx14,
 419            # GNU++17 flags.
 420            "-std=gnu++17": Std.Gnuxx17,
 421            "-std=gnu++1z": Std.Gnuxx17,
 422            # GNU++20 (presumptive) flags.
 423            "-std=gnu++2a": Std.Gnuxx2a,
 424            "-std=gnu++20": Std.Gnuxx2a,
 425        }
 426
 427        std = std_flag_map.get(last_std_flag)
 428        if std is not None:
 429            return std
 430
 431        # If we've made it here, then we've reached a -std=XXX flag that we
 432        # don't know yet. Make an effort to guess at it.
 433        std_name = last_std_flag.split("=")[1]
 434        if std_name.startswith("c++"):
 435            logger.debug(f"partially unrecognized c++ std: {last_std_flag}")
 436            return Std.CxxUnknown
 437        elif std_name.startswith("gnu++"):
 438            logger.debug(f"partially unrecognized gnu++ std: {last_std_flag}")
 439            return Std.GnuxxUnknown
 440        elif std_name.startswith("gnu"):
 441            logger.debug(f"partially unrecognized gnu c std: {last_std_flag}")
 442            return Std.GnuUnknown
 443        elif std_name.startswith("c") or std_name.startswith("iso9899"):
 444            logger.debug(f"partially unrecognized c std: {last_std_flag}")
 445            return Std.CUnknown
 446
 447        logger.debug(f"completely unrecognized -std= flag: {last_std_flag}")
 448        return Std.Unknown
 449
 450
 451class OptMixin:
 452    """
 453    A mixin for tools that have an optimization level.
 454    """
 455
 456    @property
 457    def opt(self: CanonicalizedArgsProtocol) -> OptLevel:
 458        """
 459        Returns:
 460            A `blight.enums.OptLevel` value representing the optimization level
 461        """
 462
 463        opt_flag_map = {
 464            "-O0": OptLevel.O0,
 465            "-O": OptLevel.O1,
 466            "-O1": OptLevel.O1,
 467            "-O2": OptLevel.O2,
 468            "-O3": OptLevel.O3,
 469            "-Ofast": OptLevel.OFast,
 470            "-Os": OptLevel.OSize,
 471            "-Oz": OptLevel.OSizeZ,
 472            "-Og": OptLevel.ODebug,
 473        }
 474
 475        # The last optimization flag takes precedence, so iterate over the arguments
 476        # in reverse order.
 477        for arg in reversed(self.canonicalized_args):
 478            opt = opt_flag_map.get(arg)
 479            if opt is not None:
 480                return opt
 481
 482            if not arg.startswith("-O"):
 483                continue
 484
 485            # Special case: -O4 and above are currently equivalent to -O3 in
 486            # GCC and Clang. Identify these and map them to -O3.
 487            if re.fullmatch(r"^-O[1-9]\d*$", arg):
 488                return OptLevel.O3
 489
 490            # Otherwise: We've found an argument that looks like -Osomething,
 491            # but we don't know what it is. Treat it as an unknown.
 492            logger.debug(f"unknown optimization level: {arg}")
 493            return OptLevel.Unknown
 494
 495        # If we've made it here, then the arguments don't mention an explicit
 496        # optimization level. Both GCC and Clang use -O0 by default, so return that here.
 497        return OptLevel.O0
 498
 499
 500class ResponseFileMixin:
 501    """
 502    A mixin for tools that support the `@file` syntax for adding command-line arguments
 503    via an input file.
 504
 505    These appear to originate from Windows and are called "response files" there, hence
 506    the name of this mixin.
 507    """
 508
 509    def _expand_response_file(
 510        self, response_file: Path, working_dir: Path, level: int
 511    ) -> List[str]:
 512        if level >= RESPONSE_FILE_RECURSION_LIMIT:
 513            logger.debug(f"recursion limit exceeded: {response_file} in {working_dir}")
 514            return []
 515
 516        # Non-absolute response files are resolved relative to `working_dir`, which
 517        # begins at the CWD initially and changes to the parent directory of the
 518        # including file for nested response files.
 519        if not response_file.is_absolute():
 520            response_file = working_dir / response_file
 521
 522        if not response_file.is_file():
 523            logger.debug(f"response file {response_file} does not exist")
 524            # TODO(ww): Instead of returning empty here, maybe return `@response_file`?
 525            return []
 526
 527        args = shlex.split(response_file.read_text())
 528        response_files = [(idx, arg) for (idx, arg) in enumerate(args) if arg.startswith("@")]
 529        for idx, nested_rf in response_files:
 530            args = util.insert_items_at_idx(
 531                args,
 532                idx,
 533                self._expand_response_file(
 534                    Path(nested_rf[1:]), response_file.parent.resolve(), level + 1
 535                ),
 536            )
 537
 538        return args
 539
 540    @property
 541    def canonicalized_args(self) -> List[str]:
 542        """
 543        Overrides the behavior of `Tool.canonicalized_args`, expanding any response file arguments
 544        in a depth-first manner.
 545        """
 546
 547        # NOTE(ww): This method badly needs some typechecking TLC.
 548        # The `super()` call to `canonicalized_args` probably needs to be handled
 549        # with a `self: CanonicalizedArgsProtocol` hint, but that causes other problems
 550        # related to mypy's ability to see `_expand_response_file`.
 551
 552        response_files = [
 553            (idx, arg)
 554            for (idx, arg) in enumerate(super().canonicalized_args)  # type: ignore
 555            if arg.startswith("@")
 556        ]
 557        expanded_args = super().canonicalized_args  # type: ignore
 558        for idx, response_file in response_files:
 559            expanded_args = util.insert_items_at_idx(
 560                expanded_args,
 561                idx,
 562                self._expand_response_file(Path(response_file[1:]), self.cwd, 0),  # type: ignore
 563            )
 564
 565        self._canonicalized_args = expanded_args
 566        return self._canonicalized_args  # type: ignore[no-any-return]
 567
 568
 569class DefinesMixin:
 570    """
 571    A mixin for tools that support the `-Dname[=value]` and `-Uname` syntaxes for defining
 572    and undefining C preprocessor macros.
 573    """
 574
 575    @property
 576    def indexed_undefines(self: IndexedUndefinesProtocol) -> Dict[str, int]:
 577        """
 578        Returns a dictionary of indices for undefined macros. This is used in
 579        `defines` to ensure that we don't incorrectly report a subsequently undefined
 580        macro as defined. Only the rightmost index of each undefined macro is saved.
 581
 582        Returns:
 583            A dict of `name: index` for each undefined macro.
 584        """
 585        indexed_undefines = {}
 586        for idx, arg in enumerate(self.canonicalized_args):
 587            if not arg.startswith("-U"):
 588                continue
 589
 590            # Both `-Uname` and `-U name` work in GCC and Clang.
 591            undefine = self.canonicalized_args[idx + 1] if arg == "-U" else arg[2:]
 592
 593            indexed_undefines[undefine] = idx
 594
 595        return indexed_undefines
 596
 597    @property
 598    def defines(self: IndexedUndefinesProtocol) -> List[Tuple[str, str]]:
 599        """
 600        The list of **effective** defines for this tool invocation. An "effective"
 601        define is one that is not canceled out by a subsequent undefine.
 602
 603        Returns:
 604            A list of tuples of (name, value) for each effectively defined macro.
 605        """
 606        defines = []
 607        for idx, arg in enumerate(self.canonicalized_args):
 608            if not arg.startswith("-D"):
 609                continue
 610
 611            # Both `-Dname[=value]` and `-D name[=value]` work in GCC and Clang.
 612            define = self.canonicalized_args[idx + 1] if arg == "-D" else arg[2:]
 613
 614            components = define.split("=", 1)
 615            name = components[0]
 616
 617            # NOTE(ww): 1 is the default macro value.
 618            # It's actually an integer at the preprocessor level, but we model everything
 619            # as strings here to avoid complicating things.
 620            value = "1" if len(components) == 1 else components[1]
 621
 622            # Is this macro subsequently undefined? If so, don't include it in
 623            # the defines list.
 624            if self.indexed_undefines.get(name, -1) > idx:
 625                continue
 626
 627            defines.append((name, value))
 628
 629        return defines
 630
 631
 632class CodeModelMixin:
 633    """
 634    A mixin for tools that support the `-mcmodel=MODEL` syntax for declaring their
 635    code model.
 636    """
 637
 638    @property
 639    def code_model(self: CanonicalizedArgsProtocol) -> CodeModel:
 640        """
 641        Returns:
 642            A `blight.enums.CodeModel` value representing the tool's code model
 643        """
 644        code_model_map = {
 645            "-mcmodel=small": CodeModel.Small,
 646            "-mcmodel=medlow": CodeModel.Small,
 647            "-mcmodel=medium": CodeModel.Medium,
 648            "-mcmodel=medany": CodeModel.Medium,
 649            "-mcmodel=large": CodeModel.Large,
 650            "-mcmodel=kernel": CodeModel.Kernel,
 651        }
 652
 653        # NOTE(ww): Both Clang and GCC seem to default to the "small" code model
 654        # when none is specified, at least on x86-64. But this might not be consistent
 655        # across architectures, so maybe we should return `CodeModel.Unknown` here
 656        # instead.
 657        code_model = util.ritem_prefix(self.canonicalized_args, "-mcmodel=")
 658        if code_model is None:
 659            return CodeModel.Small
 660
 661        return code_model_map.get(code_model, CodeModel.Unknown)
 662
 663
 664class LinkSearchMixin:
 665    """
 666    A mixin for tools that support the `-Lpath` and `-llib` syntaxes for specifying
 667    library paths and libraries, respectively.
 668    """
 669
 670    @property
 671    def explicit_library_search_paths(self: CanonicalizedArgsProtocol) -> List[Path]:
 672        """
 673        Returns a list of library search paths that are explicitly specified in
 674        the tool's invocation. Semantically, these paths are (normally) given
 675        priority over all other search paths.
 676
 677        NOTE: This is **not** the same as the complete list of library search paths,
 678        which is tool-specific and host-dependent.
 679        """
 680
 681        shorts = util.collect_option_values(self.canonicalized_args, "-L")
 682        longs = util.collect_option_values(
 683            self.canonicalized_args, "--library-path", style=util.OptionValueStyle.EqualOrSpace
 684        )
 685
 686        sorted_values = sorted(itertools.chain(shorts, longs), key=lambda v: v[0])
 687
 688        return [(self.cwd / value[1]).resolve() for value in sorted_values]
 689
 690    @property
 691    def library_names(self: CanonicalizedArgsProtocol) -> List[str]:
 692        """
 693        Returns a list of library names (without suffixes) for libraries that
 694        are explicitly specified in the tool's invocation.
 695
 696        NOTE: This list does not include any libraries that are
 697        listed as "inputs" to the tool rather than as linkage specifications.
 698        """
 699
 700        shorts = util.collect_option_values(self.canonicalized_args, "-l")
 701        longs = util.collect_option_values(
 702            self.canonicalized_args, "--library", style=util.OptionValueStyle.EqualOrSpace
 703        )
 704
 705        sorted_values = sorted(itertools.chain(shorts, longs), key=lambda v: v[0])
 706
 707        return [f"lib{value[1]}" for value in sorted_values]
 708
 709
 710# NOTE(ww): The funny mixin order here (`ResponseFileMixin` before `Tool`) and elsewhere
 711# is because Python defines its class hierarchy from right to left. `ResponseFileMixin`
 712# therefore needs to come first in order to properly override `canonicalized_args`.
 713class CompilerTool(
 714    LinkSearchMixin, ResponseFileMixin, Tool, StdMixin, OptMixin, DefinesMixin, CodeModelMixin
 715):
 716    """
 717    Represents a generic (C or C++) compiler frontend.
 718
 719    Like `Tool`, `CompilerTool` cannot be instantiated directly.
 720    """
 721
 722    def __init__(self, args: List[str]) -> None:
 723        if self.__class__ == CompilerTool:
 724            raise NotImplementedError(f"can't instantiate {self.__class__.__name__} directly")
 725
 726        super().__init__(args)
 727
 728        # #40 and #41: These should be handled in an overridden implementation
 729        # of `canonicalized_args`.
 730        injection_vars = COMPILER_FLAG_INJECTION_VARIABLES & self._env.keys()
 731        if injection_vars:
 732            logger.warning(f"not tracking compiler's own instrumentation: {injection_vars}")
 733
 734    @property
 735    def family(self) -> CompilerFamily:
 736        """
 737        Returns:
 738            A `blight.enums.CompilerFamily` value representing the "family" of compilers
 739            that this tool belongs to.
 740        """
 741
 742        # NOTE(ww): Both GCC and Clang support -### as an alias for -v, but
 743        # with additional guarantees around argument quoting. Do other families support it?
 744
 745        result = subprocess.run([self.wrapped_tool(), "-###"], capture_output=True)
 746
 747        # If the command exited with an error, we're likely dealing with a frontend
 748        # that doesn't understand `-###`.
 749        if result.returncode != 0:
 750            logger.warning("compiler fingerprint failed: frontend didn't recognize -###?")
 751            # ...but even still, we can infer a bit from the error message.
 752            if b"tcc: error" in result.stderr:
 753                return CompilerFamily.Tcc
 754            else:
 755                return CompilerFamily.Unknown
 756
 757        # We expect the relevant parts of `-###` on stderr. The lack of any output
 758        # again suggests that the frontend doesn't understand the flag.
 759        if not result.stderr:
 760            logger.warning("compiler fingerprint failed: frontend didn't produce output for -###?")
 761            return CompilerFamily.Unknown
 762
 763        # Finally, we do some silly substring checks.
 764        # TODO(ww): Better heuristics here?
 765        if b"Apple clang version" in result.stderr:
 766            return CompilerFamily.AppleLlvm
 767        elif b"clang version" in result.stderr:
 768            return CompilerFamily.MainlineLlvm
 769        elif b"gcc version" in result.stderr:
 770            return CompilerFamily.Gcc
 771        else:
 772            return CompilerFamily.Unknown
 773
 774    @property
 775    def stage(self) -> CompilerStage:
 776        """
 777        Returns:
 778            A `blight.enums.CompilerStage` value representing the stage that this tool is on
 779        """
 780
 781        # TODO(ww): Refactor this entire method. Both GCC and Clang can actually
 782        # run multiple stages per invocation, e.g. `-x c foo.c -x c++ bar.cpp`,
 783        # so we should model this as "stages" instead. This, in turn, will require
 784        # us to reevaluate our output guesswork below.
 785
 786        if len(self.canonicalized_args) == 0:
 787            return CompilerStage.Unknown
 788
 789        stage_flag_map = {
 790            # NOTE(ww): See the TODO in CompilerStage.
 791            "-v": CompilerStage.Unknown,
 792            "-###": CompilerStage.Unknown,
 793            "-E": CompilerStage.Preprocess,
 794            "-fsyntax-only": CompilerStage.SyntaxOnly,
 795            "-S": CompilerStage.Assemble,
 796            "-c": CompilerStage.CompileObject,
 797        }
 798
 799        for flag, stage in stage_flag_map.items():
 800            if flag in self.canonicalized_args:
 801                return stage
 802
 803        # TODO(ww): Handle header precompilation here. GCC doesn't seem to
 804        # consider this a real "stage", but it's different enough from every
 805        # other stage to warrant special treatment.
 806
 807        # No explicit stage flag? Both gcc and clang treat this as
 808        # "run all stages", so we do too.
 809        return CompilerStage.AllStages
 810
 811    @property
 812    def outputs(self) -> List[str]:
 813        """
 814        Specializes `Tool.outputs` for compiler tools.
 815        """
 816        outputs = super().outputs
 817        if outputs != []:
 818            return outputs
 819
 820        # Without an explicit `-o outfile`, the default output name(s)
 821        # depends on the compiler's stage.
 822        if self.stage == CompilerStage.Preprocess:
 823            # NOTE(ww): The preprocessor stage emits to stdout, but returning "-" as
 824            # a sentinel for that is very meh. If only Python had Rust-style enums.
 825            return ["-"]
 826        elif self.stage == CompilerStage.Assemble:
 827            # NOTE(ww): Outputs are created relative to the current working directory,
 828            # not relative to their input. We return them as relative paths to
 829            # indicate this (maybe we should just fully resolve them?)
 830            return [Path(input_).with_suffix(".s").name for input_ in self.inputs]
 831        elif self.stage == CompilerStage.CompileObject:
 832            return [Path(input_).with_suffix(".o").name for input_ in self.inputs]
 833        elif self.stage == CompilerStage.AllStages:
 834            # NOTE(ww): This will be wrong when we're doing header precompilation;
 835            # see the TODO in `stage`.
 836            return ["a.out"]
 837        else:
 838            return []
 839
 840    def asdict(self) -> Dict[str, Any]:
 841        return {
 842            **super().asdict(),
 843            "lang": self.lang.name,
 844            "std": self.std.name,
 845            "stage": self.stage.name,
 846            "opt": self.opt.name,
 847        }
 848
 849
 850class CC(CompilerTool):
 851    """
 852    A specialization of `CompilerTool` for the C compiler frontend.
 853    """
 854
 855    def __repr__(self) -> str:
 856        return f"<CC {self.wrapped_tool()} {self.lang} {self.std} {self.stage}>"
 857
 858
 859class CXX(CompilerTool):
 860    """
 861    A specialization of `CompilerTool` for the C++ compiler frontend.
 862    """
 863
 864    def __repr__(self) -> str:
 865        return f"<CXX {self.wrapped_tool()} {self.lang} {self.std} {self.stage}>"
 866
 867
 868class CPP(Tool, StdMixin, DefinesMixin):
 869    """
 870    Represents the C preprocessor tool.
 871    """
 872
 873    def __repr__(self) -> str:
 874        return f"<CPP {self.wrapped_tool()} {self.lang} {self.std}>"
 875
 876    def asdict(self) -> Dict[str, Any]:
 877        return {**super().asdict(), "lang": self.lang.name, "std": self.std.name}
 878
 879
 880class LD(LinkSearchMixin, ResponseFileMixin, Tool):
 881    """
 882    Represents the linker.
 883    """
 884
 885    @property
 886    def outputs(self) -> List[str]:
 887        """
 888        Specializes `Tool.outputs` for the linker.
 889        """
 890
 891        outputs = super().outputs
 892        if outputs != []:
 893            return outputs
 894
 895        # The GNU linker additionally supports --output=OUTFILE and
 896        # --output OUTFILE. Handle them here.
 897        output_flag_index = util.rindex_prefix(self.canonicalized_args, "--output")
 898        if output_flag_index is None:
 899            return ["a.out"]
 900
 901        # Split option form.
 902        if self.canonicalized_args[output_flag_index] == "--output":
 903            return [self.canonicalized_args[output_flag_index + 1]]
 904
 905        # Assignment form.
 906        return [self.canonicalized_args[output_flag_index].split("=")[1]]
 907
 908    def __repr__(self) -> str:
 909        return f"<LD {self.wrapped_tool()}>"
 910
 911
 912class AS(ResponseFileMixin, Tool):
 913    """
 914    Represents the assembler.
 915    """
 916
 917    def __repr__(self) -> str:
 918        return f"<AS {self.wrapped_tool()}>"
 919
 920
 921class AR(ResponseFileMixin, Tool):
 922    """
 923    Represents the archiver.
 924    """
 925
 926    @property
 927    def outputs(self) -> List[str]:
 928        """
 929        Specializes `Tool.outputs` for the archiver.
 930        """
 931
 932        # TODO(ww): This doesn't support `ar x`, which explodes the archive
 933        # (i.e., treats it as input) instead of treats it as output.
 934        # It would be pretty strange for a build system to do this, but it's
 935        # probably something we should detect at the very least.
 936
 937        # TODO(ww): We also don't support `ar t`, which queries the given
 938        # archive to provide a table listing of its contents.
 939
 940        # NOTE(ww): `ar`'s POSIX and GNU CLIs are annoyingly complicated.
 941        # We save ourselves some pain by scanning from left-to-right, looking
 942        # for the first argument that looks like an archive output
 943        # (since the archiver only ever produces one output at a time).
 944        for arg in self.canonicalized_args:
 945            if arg.startswith("-"):
 946                continue
 947
 948            maybe_archive_suffixes = Path(arg).suffixes
 949            if len(maybe_archive_suffixes) > 0 and maybe_archive_suffixes[0] == ".a":
 950                return [arg]
 951
 952        logger.debug("couldn't infer output for archiver")
 953        return []
 954
 955    def __repr__(self) -> str:
 956        return f"<AR {self.wrapped_tool()}>"
 957
 958
 959class STRIP(ResponseFileMixin, Tool):
 960    """
 961    Represents the stripping tool.
 962    """
 963
 964    def __repr__(self) -> str:
 965        return f"<STRIP {self.wrapped_tool()}>"
 966
 967
 968class INSTALL(Tool):
 969    """
 970    Represents the install tool.
 971    """
 972
 973    def _install_parser(self) -> util.ArgumentParser:
 974        parser = util.ArgumentParser(
 975            prog=self.build_tool().value, add_help=False, allow_abbrev=False
 976        )
 977
 978        def add_flag(short: str, dest: str, **kwargs: Any) -> None:
 979            parser.add_argument(short, action="store_true", dest=dest, **kwargs)
 980
 981        add_flag("-b", "overwrite")
 982        add_flag("-C", "copy_no_mtime")
 983        add_flag("-c", "copy", default=True)
 984        add_flag("-d", "directory_mode")
 985        add_flag("-M", "disable_mmap")
 986        add_flag("-p", "preserve_mtime")
 987        add_flag("-S", "safe_copy")
 988        add_flag("-s", "exec_strip")
 989        add_flag("-v", "verbose")
 990        parser.add_argument("-f", dest="flags")
 991        parser.add_argument("-g", dest="group")
 992        parser.add_argument("-m", dest="mode")
 993        parser.add_argument("-o", dest="owner")
 994        parser.add_argument("trailing", nargs="+", default=[])
 995
 996        return parser
 997
 998    def __init__(self, args: List[str]) -> None:
 999        super().__init__(args)
1000        self._parser = self._install_parser()
1001
1002        try:
1003            (self._matches, self._unknown) = self._parser.parse_known_args(args)
1004        except ValueError as e:
1005            logger.error(f"argparse error: {e}")
1006            self._matches = self._parser.default_namespace()
1007            self._unknown = args
1008
1009    @property
1010    def directory_mode(self) -> bool:
1011        """
1012        Returns whether this `install` invocation is in "directory mode," i.e.
1013        is creating directories instead of installing files.
1014        """
1015        return self._matches.directory_mode  # type: ignore[no-any-return]
1016
1017    @property
1018    def inputs(self) -> List[str]:
1019        """
1020        Specializes `Tool.inputs` for the install tool.
1021        """
1022
1023        # Directory mode: all positionals are new directories, i.e. outputs.
1024        if self.directory_mode:
1025            return []
1026
1027        # `install` requires at least two positionals outside of directory mode,
1028        # so this probably indicates an unknown GNUism like `--help`.
1029        if len(self._matches.trailing) < 2:
1030            logger.debug(f"install called with no positionals (hint: unknown args: {self._unknown}")
1031            return []
1032
1033        # Otherwise, we're either installing one file to another or we're
1034        # installing multiple files to a directory. Test the last positional
1035        # to determine which mode we're in.
1036        maybe_dir = self._cwd / self._matches.trailing[-1]
1037        if maybe_dir.is_dir():
1038            return self._matches.trailing[0:-1]  # type: ignore[no-any-return]
1039        else:
1040            return [self._matches.trailing[0]]
1041
1042    @property
1043    def outputs(self) -> List[str]:
1044        """
1045        Specializes `Tool.outputs` for the install tool.
1046        """
1047
1048        # Directory mode: treat created directories as outputs.
1049        if self.directory_mode:
1050            return self._matches.trailing  # type: ignore[no-any-return]
1051
1052        # `install` requires at least two positionals outside of directory mode,
1053        # so this probably indicates an unknown GNUism like `--help`.
1054        if len(self._matches.trailing) < 2:
1055            logger.debug(f"install called with no positionals (hint: unknown args: {self._unknown}")
1056            return []
1057
1058        # If we're installing multiple files to a destination directory,
1059        # then our outputs are every input, under the destination.
1060        # Otherwise, our output is a single file.
1061        maybe_dir = self._cwd / self._matches.trailing[-1]
1062        if maybe_dir.is_dir():
1063            inputs = [Path(input_) for input_ in self._matches.trailing[0:-1]]
1064            return [str(maybe_dir / input_.name) for input_ in inputs]
1065        else:
1066            return [self._matches.trailing[-1]]
1067
1068    def __repr__(self) -> str:
1069        return f"<INSTALL {self.wrapped_tool()}>"

logger = <Logger blight.tool (INFO)>

RESPONSE_FILE_RECURSION_LIMIT = 64

Response files can contain further @file arguments, because of course they can.

Neither clang nor GCC is explicit in their documentation about their recursion limits, if they have any. We choose an arbitrary limit here.

class LangMixin: View Source

278class LangMixin:
279    """
280    A mixin for tools that have a "language" component, i.e.
281    those that change their behavior based on the language that they're used with.
282    """
283
284    @property
285    def lang(self: CanonicalizedArgsProtocol) -> Lang:
286        """
287        Returns:
288            A `blight.enums.Lang` value representing the tool's language
289        """
290        logger.warning(
291            "this API might not do what you expect; see: https://github.com/trailofbits/blight/issues/43493"
292        )
293
294        x_lang_map = {"c": Lang.C, "c-header": Lang.C, "c++": Lang.Cxx, "c++-header": Lang.Cxx}
295
296        # First, check for `-x lang`. This overrides the language determined by
297        # the frontend's binary name (e.g. `g++`).
298        x_flag_index = util.rindex_prefix(self.canonicalized_args, "-x")
299        if x_flag_index is not None:
300            if self.canonicalized_args[x_flag_index] == "-x":
301                # TODO(ww): Maybe bounds check.
302                x_lang = self.canonicalized_args[x_flag_index + 1]
303            else:
304                # NOTE(ww): -xc and -xc++ both work, at least on GCC.
305                x_lang = self.canonicalized_args[x_flag_index][2:]
306            return x_lang_map.get(x_lang, Lang.Unknown)
307
308        # No `-x lang` means that we're operating in the frontend's default mode.
309        if self.__class__ == CC:
310            return Lang.C
311        elif self.__class__ == CXX:
312            return Lang.Cxx
313        else:
314            logger.debug(f"unknown default language mode for {self.__class__.__name__}")
315            return Lang.Unknown

A mixin for tools that have a "language" component, i.e. those that change their behavior based on the language that they're used with.

lang: blight.enums.Lang View Source

284    @property
285    def lang(self: CanonicalizedArgsProtocol) -> Lang:
286        """
287        Returns:
288            A `blight.enums.Lang` value representing the tool's language
289        """
290        logger.warning(
291            "this API might not do what you expect; see: https://github.com/trailofbits/blight/issues/43493"
292        )
293
294        x_lang_map = {"c": Lang.C, "c-header": Lang.C, "c++": Lang.Cxx, "c++-header": Lang.Cxx}
295
296        # First, check for `-x lang`. This overrides the language determined by
297        # the frontend's binary name (e.g. `g++`).
298        x_flag_index = util.rindex_prefix(self.canonicalized_args, "-x")
299        if x_flag_index is not None:
300            if self.canonicalized_args[x_flag_index] == "-x":
301                # TODO(ww): Maybe bounds check.
302                x_lang = self.canonicalized_args[x_flag_index + 1]
303            else:
304                # NOTE(ww): -xc and -xc++ both work, at least on GCC.
305                x_lang = self.canonicalized_args[x_flag_index][2:]
306            return x_lang_map.get(x_lang, Lang.Unknown)
307
308        # No `-x lang` means that we're operating in the frontend's default mode.
309        if self.__class__ == CC:
310            return Lang.C
311        elif self.__class__ == CXX:
312            return Lang.Cxx
313        else:
314            logger.debug(f"unknown default language mode for {self.__class__.__name__}")
315            return Lang.Unknown

Returns: A blight.enums.Lang value representing the tool's language

class StdMixin(LangMixin): View Source

318class StdMixin(LangMixin):
319    """
320    A mixin for tools that have a "standard" component, i.e.
321    those that change their behavior based on a particular language standard.
322    """
323
324    @property
325    def std(self: LangProtocol) -> Std:
326        """
327        Returns:
328            A `blight.enums.Std` value representing the tool's standard
329        """
330
331        # First, a special case: if -ansi is present, we're in
332        # C89 mode for C code and C++03 mode for C++ code.
333        if "-ansi" in self.canonicalized_args:
334            if self.lang == Lang.C:
335                return Std.C89
336            elif self.lang == Lang.Cxx:
337                return Std.Cxx03
338            else:
339                logger.debug(f"-ansi passed but unknown language: {self.lang}")
340                return Std.Unknown
341
342        # Experimentally, both GCC and clang respect the last -std=XXX flag passed.
343        # See: https://stackoverflow.com/questions/40563269/passing-multiple-std-switches-to-g
344        std_flag_index = util.rindex_prefix(self.canonicalized_args, "-std=")
345
346        # No -std=XXX flags? The tool is operating in its default standard mode,
347        # which is determined by its language.
348        if std_flag_index is None:
349            if self.lang == Lang.C:
350                return Std.GnuUnknown
351            elif self.lang == Lang.Cxx:
352                return Std.GnuxxUnknown
353            else:
354                logger.debug(f"no -std= flag and unknown language: {self.lang}")
355                return Std.Unknown
356
357        last_std_flag = self.canonicalized_args[std_flag_index]
358        std_flag_map = {
359            # C89 flags.
360            "-std=c89": Std.C89,
361            "-std=c90": Std.C89,
362            "-std=iso9899:1990": Std.C89,
363            # C94 flags.
364            "-std=iso9899:199409": Std.C94,
365            # C99 flags.
366            "-std=c99": Std.C99,
367            "-std=c9x": Std.C99,
368            "-std=iso9899:1999": Std.C99,
369            "-std=iso9899:199x": Std.C99,
370            # C11 flags.
371            "-std=c11": Std.C11,
372            "-std=c1x": Std.C11,
373            "-std=iso9899:2011": Std.C11,
374            # C17 flags.
375            "-std=c17": Std.C17,
376            "-std=c18": Std.C17,
377            "-std=iso9899:2017": Std.C17,
378            "-std=iso9899:2018": Std.C17,
379            # C20 (presumptive) flags.
380            "-std=c2x": Std.C2x,
381            # GNU89 flags.
382            "-std=gnu89": Std.Gnu89,
383            "-std=gnu90": Std.Gnu89,
384            # GNU99 flags.
385            "-std=gnu99": Std.Gnu99,
386            "-std=gnu9x": Std.Gnu99,
387            # GNU11 flags.
388            "-std=gnu11": Std.Gnu11,
389            "-std=gnu1x": Std.Gnu11,
390            # GNU17 flags.
391            "-std=gnu17": Std.Gnu17,
392            "-std=gnu18": Std.Gnu17,
393            # GNU20 (presumptive) flags.
394            "-std=gnu2x": Std.Gnu2x,
395            # C++03 flags.
396            # NOTE(ww): Both gcc and clang treat C++98 mode as C++03 mode.
397            "-std=c++98": Std.Cxx03,
398            "-std=c++03": Std.Cxx03,
399            # C++11 flags.
400            "-std=c++11": Std.Cxx11,
401            "-std=c++0x": Std.Cxx11,
402            # C++14 flags.
403            "-std=c++14": Std.Cxx14,
404            "-std=c++1y": Std.Cxx14,
405            # C++17 flags.
406            "-std=c++17": Std.Cxx17,
407            "-std=c++1z": Std.Cxx17,
408            # C++20 (presumptive) flags.
409            "-std=c++2a": Std.Cxx2a,
410            "-std=c++20": Std.Cxx2a,
411            # GNU++03 flags.
412            "-std=gnu++98": Std.Gnuxx03,
413            "-std=gnu++03": Std.Gnuxx03,
414            # GNU++11 flags.
415            "-std=gnu++11": Std.Gnuxx11,
416            "-std=gnu++0x": Std.Gnuxx11,
417            # GNU++14 flags.
418            "-std=gnu++14": Std.Gnuxx14,
419            "-std=gnu++1y": Std.Gnuxx14,
420            # GNU++17 flags.
421            "-std=gnu++17": Std.Gnuxx17,
422            "-std=gnu++1z": Std.Gnuxx17,
423            # GNU++20 (presumptive) flags.
424            "-std=gnu++2a": Std.Gnuxx2a,
425            "-std=gnu++20": Std.Gnuxx2a,
426        }
427
428        std = std_flag_map.get(last_std_flag)
429        if std is not None:
430            return std
431
432        # If we've made it here, then we've reached a -std=XXX flag that we
433        # don't know yet. Make an effort to guess at it.
434        std_name = last_std_flag.split("=")[1]
435        if std_name.startswith("c++"):
436            logger.debug(f"partially unrecognized c++ std: {last_std_flag}")
437            return Std.CxxUnknown
438        elif std_name.startswith("gnu++"):
439            logger.debug(f"partially unrecognized gnu++ std: {last_std_flag}")
440            return Std.GnuxxUnknown
441        elif std_name.startswith("gnu"):
442            logger.debug(f"partially unrecognized gnu c std: {last_std_flag}")
443            return Std.GnuUnknown
444        elif std_name.startswith("c") or std_name.startswith("iso9899"):
445            logger.debug(f"partially unrecognized c std: {last_std_flag}")
446            return Std.CUnknown
447
448        logger.debug(f"completely unrecognized -std= flag: {last_std_flag}")
449        return Std.Unknown

A mixin for tools that have a "standard" component, i.e. those that change their behavior based on a particular language standard.

std: blight.enums.Std View Source

324    @property
325    def std(self: LangProtocol) -> Std:
326        """
327        Returns:
328            A `blight.enums.Std` value representing the tool's standard
329        """
330
331        # First, a special case: if -ansi is present, we're in
332        # C89 mode for C code and C++03 mode for C++ code.
333        if "-ansi" in self.canonicalized_args:
334            if self.lang == Lang.C:
335                return Std.C89
336            elif self.lang == Lang.Cxx:
337                return Std.Cxx03
338            else:
339                logger.debug(f"-ansi passed but unknown language: {self.lang}")
340                return Std.Unknown
341
342        # Experimentally, both GCC and clang respect the last -std=XXX flag passed.
343        # See: https://stackoverflow.com/questions/40563269/passing-multiple-std-switches-to-g
344        std_flag_index = util.rindex_prefix(self.canonicalized_args, "-std=")
345
346        # No -std=XXX flags? The tool is operating in its default standard mode,
347        # which is determined by its language.
348        if std_flag_index is None:
349            if self.lang == Lang.C:
350                return Std.GnuUnknown
351            elif self.lang == Lang.Cxx:
352                return Std.GnuxxUnknown
353            else:
354                logger.debug(f"no -std= flag and unknown language: {self.lang}")
355                return Std.Unknown
356
357        last_std_flag = self.canonicalized_args[std_flag_index]
358        std_flag_map = {
359            # C89 flags.
360            "-std=c89": Std.C89,
361            "-std=c90": Std.C89,
362            "-std=iso9899:1990": Std.C89,
363            # C94 flags.
364            "-std=iso9899:199409": Std.C94,
365            # C99 flags.
366            "-std=c99": Std.C99,
367            "-std=c9x": Std.C99,
368            "-std=iso9899:1999": Std.C99,
369            "-std=iso9899:199x": Std.C99,
370            # C11 flags.
371            "-std=c11": Std.C11,
372            "-std=c1x": Std.C11,
373            "-std=iso9899:2011": Std.C11,
374            # C17 flags.
375            "-std=c17": Std.C17,
376            "-std=c18": Std.C17,
377            "-std=iso9899:2017": Std.C17,
378            "-std=iso9899:2018": Std.C17,
379            # C20 (presumptive) flags.
380            "-std=c2x": Std.C2x,
381            # GNU89 flags.
382            "-std=gnu89": Std.Gnu89,
383            "-std=gnu90": Std.Gnu89,
384            # GNU99 flags.
385            "-std=gnu99": Std.Gnu99,
386            "-std=gnu9x": Std.Gnu99,
387            # GNU11 flags.
388            "-std=gnu11": Std.Gnu11,
389            "-std=gnu1x": Std.Gnu11,
390            # GNU17 flags.
391            "-std=gnu17": Std.Gnu17,
392            "-std=gnu18": Std.Gnu17,
393            # GNU20 (presumptive) flags.
394            "-std=gnu2x": Std.Gnu2x,
395            # C++03 flags.
396            # NOTE(ww): Both gcc and clang treat C++98 mode as C++03 mode.
397            "-std=c++98": Std.Cxx03,
398            "-std=c++03": Std.Cxx03,
399            # C++11 flags.
400            "-std=c++11": Std.Cxx11,
401            "-std=c++0x": Std.Cxx11,
402            # C++14 flags.
403            "-std=c++14": Std.Cxx14,
404            "-std=c++1y": Std.Cxx14,
405            # C++17 flags.
406            "-std=c++17": Std.Cxx17,
407            "-std=c++1z": Std.Cxx17,
408            # C++20 (presumptive) flags.
409            "-std=c++2a": Std.Cxx2a,
410            "-std=c++20": Std.Cxx2a,
411            # GNU++03 flags.
412            "-std=gnu++98": Std.Gnuxx03,
413            "-std=gnu++03": Std.Gnuxx03,
414            # GNU++11 flags.
415            "-std=gnu++11": Std.Gnuxx11,
416            "-std=gnu++0x": Std.Gnuxx11,
417            # GNU++14 flags.
418            "-std=gnu++14": Std.Gnuxx14,
419            "-std=gnu++1y": Std.Gnuxx14,
420            # GNU++17 flags.
421            "-std=gnu++17": Std.Gnuxx17,
422            "-std=gnu++1z": Std.Gnuxx17,
423            # GNU++20 (presumptive) flags.
424            "-std=gnu++2a": Std.Gnuxx2a,
425            "-std=gnu++20": Std.Gnuxx2a,
426        }
427
428        std = std_flag_map.get(last_std_flag)
429        if std is not None:
430            return std
431
432        # If we've made it here, then we've reached a -std=XXX flag that we
433        # don't know yet. Make an effort to guess at it.
434        std_name = last_std_flag.split("=")[1]
435        if std_name.startswith("c++"):
436            logger.debug(f"partially unrecognized c++ std: {last_std_flag}")
437            return Std.CxxUnknown
438        elif std_name.startswith("gnu++"):
439            logger.debug(f"partially unrecognized gnu++ std: {last_std_flag}")
440            return Std.GnuxxUnknown
441        elif std_name.startswith("gnu"):
442            logger.debug(f"partially unrecognized gnu c std: {last_std_flag}")
443            return Std.GnuUnknown
444        elif std_name.startswith("c") or std_name.startswith("iso9899"):
445            logger.debug(f"partially unrecognized c std: {last_std_flag}")
446            return Std.CUnknown
447
448        logger.debug(f"completely unrecognized -std= flag: {last_std_flag}")
449        return Std.Unknown

Returns: A blight.enums.Std value representing the tool's standard

Inherited Members

LangMixin: lang

class OptMixin: View Source

452class OptMixin:
453    """
454    A mixin for tools that have an optimization level.
455    """
456
457    @property
458    def opt(self: CanonicalizedArgsProtocol) -> OptLevel:
459        """
460        Returns:
461            A `blight.enums.OptLevel` value representing the optimization level
462        """
463
464        opt_flag_map = {
465            "-O0": OptLevel.O0,
466            "-O": OptLevel.O1,
467            "-O1": OptLevel.O1,
468            "-O2": OptLevel.O2,
469            "-O3": OptLevel.O3,
470            "-Ofast": OptLevel.OFast,
471            "-Os": OptLevel.OSize,
472            "-Oz": OptLevel.OSizeZ,
473            "-Og": OptLevel.ODebug,
474        }
475
476        # The last optimization flag takes precedence, so iterate over the arguments
477        # in reverse order.
478        for arg in reversed(self.canonicalized_args):
479            opt = opt_flag_map.get(arg)
480            if opt is not None:
481                return opt
482
483            if not arg.startswith("-O"):
484                continue
485
486            # Special case: -O4 and above are currently equivalent to -O3 in
487            # GCC and Clang. Identify these and map them to -O3.
488            if re.fullmatch(r"^-O[1-9]\d*$", arg):
489                return OptLevel.O3
490
491            # Otherwise: We've found an argument that looks like -Osomething,
492            # but we don't know what it is. Treat it as an unknown.
493            logger.debug(f"unknown optimization level: {arg}")
494            return OptLevel.Unknown
495
496        # If we've made it here, then the arguments don't mention an explicit
497        # optimization level. Both GCC and Clang use -O0 by default, so return that here.
498        return OptLevel.O0

A mixin for tools that have an optimization level.

opt: blight.enums.OptLevel View Source

457    @property
458    def opt(self: CanonicalizedArgsProtocol) -> OptLevel:
459        """
460        Returns:
461            A `blight.enums.OptLevel` value representing the optimization level
462        """
463
464        opt_flag_map = {
465            "-O0": OptLevel.O0,
466            "-O": OptLevel.O1,
467            "-O1": OptLevel.O1,
468            "-O2": OptLevel.O2,
469            "-O3": OptLevel.O3,
470            "-Ofast": OptLevel.OFast,
471            "-Os": OptLevel.OSize,
472            "-Oz": OptLevel.OSizeZ,
473            "-Og": OptLevel.ODebug,
474        }
475
476        # The last optimization flag takes precedence, so iterate over the arguments
477        # in reverse order.
478        for arg in reversed(self.canonicalized_args):
479            opt = opt_flag_map.get(arg)
480            if opt is not None:
481                return opt
482
483            if not arg.startswith("-O"):
484                continue
485
486            # Special case: -O4 and above are currently equivalent to -O3 in
487            # GCC and Clang. Identify these and map them to -O3.
488            if re.fullmatch(r"^-O[1-9]\d*$", arg):
489                return OptLevel.O3
490
491            # Otherwise: We've found an argument that looks like -Osomething,
492            # but we don't know what it is. Treat it as an unknown.
493            logger.debug(f"unknown optimization level: {arg}")
494            return OptLevel.Unknown
495
496        # If we've made it here, then the arguments don't mention an explicit
497        # optimization level. Both GCC and Clang use -O0 by default, so return that here.
498        return OptLevel.O0

Returns: A blight.enums.OptLevel value representing the optimization level

class ResponseFileMixin: View Source

501class ResponseFileMixin:
502    """
503    A mixin for tools that support the `@file` syntax for adding command-line arguments
504    via an input file.
505
506    These appear to originate from Windows and are called "response files" there, hence
507    the name of this mixin.
508    """
509
510    def _expand_response_file(
511        self, response_file: Path, working_dir: Path, level: int
512    ) -> List[str]:
513        if level >= RESPONSE_FILE_RECURSION_LIMIT:
514            logger.debug(f"recursion limit exceeded: {response_file} in {working_dir}")
515            return []
516
517        # Non-absolute response files are resolved relative to `working_dir`, which
518        # begins at the CWD initially and changes to the parent directory of the
519        # including file for nested response files.
520        if not response_file.is_absolute():
521            response_file = working_dir / response_file
522
523        if not response_file.is_file():
524            logger.debug(f"response file {response_file} does not exist")
525            # TODO(ww): Instead of returning empty here, maybe return `@response_file`?
526            return []
527
528        args = shlex.split(response_file.read_text())
529        response_files = [(idx, arg) for (idx, arg) in enumerate(args) if arg.startswith("@")]
530        for idx, nested_rf in response_files:
531            args = util.insert_items_at_idx(
532                args,
533                idx,
534                self._expand_response_file(
535                    Path(nested_rf[1:]), response_file.parent.resolve(), level + 1
536                ),
537            )
538
539        return args
540
541    @property
542    def canonicalized_args(self) -> List[str]:
543        """
544        Overrides the behavior of `Tool.canonicalized_args`, expanding any response file arguments
545        in a depth-first manner.
546        """
547
548        # NOTE(ww): This method badly needs some typechecking TLC.
549        # The `super()` call to `canonicalized_args` probably needs to be handled
550        # with a `self: CanonicalizedArgsProtocol` hint, but that causes other problems
551        # related to mypy's ability to see `_expand_response_file`.
552
553        response_files = [
554            (idx, arg)
555            for (idx, arg) in enumerate(super().canonicalized_args)  # type: ignore
556            if arg.startswith("@")
557        ]
558        expanded_args = super().canonicalized_args  # type: ignore
559        for idx, response_file in response_files:
560            expanded_args = util.insert_items_at_idx(
561                expanded_args,
562                idx,
563                self._expand_response_file(Path(response_file[1:]), self.cwd, 0),  # type: ignore
564            )
565
566        self._canonicalized_args = expanded_args
567        return self._canonicalized_args  # type: ignore[no-any-return]

A mixin for tools that support the @file syntax for adding command-line arguments via an input file.

These appear to originate from Windows and are called "response files" there, hence the name of this mixin.

canonicalized_args: List[str] View Source

541    @property
542    def canonicalized_args(self) -> List[str]:
543        """
544        Overrides the behavior of `Tool.canonicalized_args`, expanding any response file arguments
545        in a depth-first manner.
546        """
547
548        # NOTE(ww): This method badly needs some typechecking TLC.
549        # The `super()` call to `canonicalized_args` probably needs to be handled
550        # with a `self: CanonicalizedArgsProtocol` hint, but that causes other problems
551        # related to mypy's ability to see `_expand_response_file`.
552
553        response_files = [
554            (idx, arg)
555            for (idx, arg) in enumerate(super().canonicalized_args)  # type: ignore
556            if arg.startswith("@")
557        ]
558        expanded_args = super().canonicalized_args  # type: ignore
559        for idx, response_file in response_files:
560            expanded_args = util.insert_items_at_idx(
561                expanded_args,
562                idx,
563                self._expand_response_file(Path(response_file[1:]), self.cwd, 0),  # type: ignore
564            )
565
566        self._canonicalized_args = expanded_args
567        return self._canonicalized_args  # type: ignore[no-any-return]

Overrides the behavior of Tool.canonicalized_args, expanding any response file arguments in a depth-first manner.

class DefinesMixin: View Source

570class DefinesMixin:
571    """
572    A mixin for tools that support the `-Dname[=value]` and `-Uname` syntaxes for defining
573    and undefining C preprocessor macros.
574    """
575
576    @property
577    def indexed_undefines(self: IndexedUndefinesProtocol) -> Dict[str, int]:
578        """
579        Returns a dictionary of indices for undefined macros. This is used in
580        `defines` to ensure that we don't incorrectly report a subsequently undefined
581        macro as defined. Only the rightmost index of each undefined macro is saved.
582
583        Returns:
584            A dict of `name: index` for each undefined macro.
585        """
586        indexed_undefines = {}
587        for idx, arg in enumerate(self.canonicalized_args):
588            if not arg.startswith("-U"):
589                continue
590
591            # Both `-Uname` and `-U name` work in GCC and Clang.
592            undefine = self.canonicalized_args[idx + 1] if arg == "-U" else arg[2:]
593
594            indexed_undefines[undefine] = idx
595
596        return indexed_undefines
597
598    @property
599    def defines(self: IndexedUndefinesProtocol) -> List[Tuple[str, str]]:
600        """
601        The list of **effective** defines for this tool invocation. An "effective"
602        define is one that is not canceled out by a subsequent undefine.
603
604        Returns:
605            A list of tuples of (name, value) for each effectively defined macro.
606        """
607        defines = []
608        for idx, arg in enumerate(self.canonicalized_args):
609            if not arg.startswith("-D"):
610                continue
611
612            # Both `-Dname[=value]` and `-D name[=value]` work in GCC and Clang.
613            define = self.canonicalized_args[idx + 1] if arg == "-D" else arg[2:]
614
615            components = define.split("=", 1)
616            name = components[0]
617
618            # NOTE(ww): 1 is the default macro value.
619            # It's actually an integer at the preprocessor level, but we model everything
620            # as strings here to avoid complicating things.
621            value = "1" if len(components) == 1 else components[1]
622
623            # Is this macro subsequently undefined? If so, don't include it in
624            # the defines list.
625            if self.indexed_undefines.get(name, -1) > idx:
626                continue
627
628            defines.append((name, value))
629
630        return defines

A mixin for tools that support the -Dname[=value] and -Uname syntaxes for defining and undefining C preprocessor macros.

indexed_undefines: Dict[str, int] View Source

576    @property
577    def indexed_undefines(self: IndexedUndefinesProtocol) -> Dict[str, int]:
578        """
579        Returns a dictionary of indices for undefined macros. This is used in
580        `defines` to ensure that we don't incorrectly report a subsequently undefined
581        macro as defined. Only the rightmost index of each undefined macro is saved.
582
583        Returns:
584            A dict of `name: index` for each undefined macro.
585        """
586        indexed_undefines = {}
587        for idx, arg in enumerate(self.canonicalized_args):
588            if not arg.startswith("-U"):
589                continue
590
591            # Both `-Uname` and `-U name` work in GCC and Clang.
592            undefine = self.canonicalized_args[idx + 1] if arg == "-U" else arg[2:]
593
594            indexed_undefines[undefine] = idx
595
596        return indexed_undefines

Returns a dictionary of indices for undefined macros. This is used in defines to ensure that we don't incorrectly report a subsequently undefined macro as defined. Only the rightmost index of each undefined macro is saved.

Returns: A dict of name: index for each undefined macro.

defines: List[Tuple[str, str]] View Source

598    @property
599    def defines(self: IndexedUndefinesProtocol) -> List[Tuple[str, str]]:
600        """
601        The list of **effective** defines for this tool invocation. An "effective"
602        define is one that is not canceled out by a subsequent undefine.
603
604        Returns:
605            A list of tuples of (name, value) for each effectively defined macro.
606        """
607        defines = []
608        for idx, arg in enumerate(self.canonicalized_args):
609            if not arg.startswith("-D"):
610                continue
611
612            # Both `-Dname[=value]` and `-D name[=value]` work in GCC and Clang.
613            define = self.canonicalized_args[idx + 1] if arg == "-D" else arg[2:]
614
615            components = define.split("=", 1)
616            name = components[0]
617
618            # NOTE(ww): 1 is the default macro value.
619            # It's actually an integer at the preprocessor level, but we model everything
620            # as strings here to avoid complicating things.
621            value = "1" if len(components) == 1 else components[1]
622
623            # Is this macro subsequently undefined? If so, don't include it in
624            # the defines list.
625            if self.indexed_undefines.get(name, -1) > idx:
626                continue
627
628            defines.append((name, value))
629
630        return defines

The list of effective defines for this tool invocation. An "effective" define is one that is not canceled out by a subsequent undefine.

Returns: A list of tuples of (name, value) for each effectively defined macro.

class CodeModelMixin: View Source

633class CodeModelMixin:
634    """
635    A mixin for tools that support the `-mcmodel=MODEL` syntax for declaring their
636    code model.
637    """
638
639    @property
640    def code_model(self: CanonicalizedArgsProtocol) -> CodeModel:
641        """
642        Returns:
643            A `blight.enums.CodeModel` value representing the tool's code model
644        """
645        code_model_map = {
646            "-mcmodel=small": CodeModel.Small,
647            "-mcmodel=medlow": CodeModel.Small,
648            "-mcmodel=medium": CodeModel.Medium,
649            "-mcmodel=medany": CodeModel.Medium,
650            "-mcmodel=large": CodeModel.Large,
651            "-mcmodel=kernel": CodeModel.Kernel,
652        }
653
654        # NOTE(ww): Both Clang and GCC seem to default to the "small" code model
655        # when none is specified, at least on x86-64. But this might not be consistent
656        # across architectures, so maybe we should return `CodeModel.Unknown` here
657        # instead.
658        code_model = util.ritem_prefix(self.canonicalized_args, "-mcmodel=")
659        if code_model is None:
660            return CodeModel.Small
661
662        return code_model_map.get(code_model, CodeModel.Unknown)

A mixin for tools that support the -mcmodel=MODEL syntax for declaring their code model.

code_model: blight.enums.CodeModel View Source

639    @property
640    def code_model(self: CanonicalizedArgsProtocol) -> CodeModel:
641        """
642        Returns:
643            A `blight.enums.CodeModel` value representing the tool's code model
644        """
645        code_model_map = {
646            "-mcmodel=small": CodeModel.Small,
647            "-mcmodel=medlow": CodeModel.Small,
648            "-mcmodel=medium": CodeModel.Medium,
649            "-mcmodel=medany": CodeModel.Medium,
650            "-mcmodel=large": CodeModel.Large,
651            "-mcmodel=kernel": CodeModel.Kernel,
652        }
653
654        # NOTE(ww): Both Clang and GCC seem to default to the "small" code model
655        # when none is specified, at least on x86-64. But this might not be consistent
656        # across architectures, so maybe we should return `CodeModel.Unknown` here
657        # instead.
658        code_model = util.ritem_prefix(self.canonicalized_args, "-mcmodel=")
659        if code_model is None:
660            return CodeModel.Small
661
662        return code_model_map.get(code_model, CodeModel.Unknown)

Returns: A blight.enums.CodeModel value representing the tool's code model

class LinkSearchMixin: View Source

665class LinkSearchMixin:
666    """
667    A mixin for tools that support the `-Lpath` and `-llib` syntaxes for specifying
668    library paths and libraries, respectively.
669    """
670
671    @property
672    def explicit_library_search_paths(self: CanonicalizedArgsProtocol) -> List[Path]:
673        """
674        Returns a list of library search paths that are explicitly specified in
675        the tool's invocation. Semantically, these paths are (normally) given
676        priority over all other search paths.
677
678        NOTE: This is **not** the same as the complete list of library search paths,
679        which is tool-specific and host-dependent.
680        """
681
682        shorts = util.collect_option_values(self.canonicalized_args, "-L")
683        longs = util.collect_option_values(
684            self.canonicalized_args, "--library-path", style=util.OptionValueStyle.EqualOrSpace
685        )
686
687        sorted_values = sorted(itertools.chain(shorts, longs), key=lambda v: v[0])
688
689        return [(self.cwd / value[1]).resolve() for value in sorted_values]
690
691    @property
692    def library_names(self: CanonicalizedArgsProtocol) -> List[str]:
693        """
694        Returns a list of library names (without suffixes) for libraries that
695        are explicitly specified in the tool's invocation.
696
697        NOTE: This list does not include any libraries that are
698        listed as "inputs" to the tool rather than as linkage specifications.
699        """
700
701        shorts = util.collect_option_values(self.canonicalized_args, "-l")
702        longs = util.collect_option_values(
703            self.canonicalized_args, "--library", style=util.OptionValueStyle.EqualOrSpace
704        )
705
706        sorted_values = sorted(itertools.chain(shorts, longs), key=lambda v: v[0])
707
708        return [f"lib{value[1]}" for value in sorted_values]

A mixin for tools that support the -Lpath and -llib syntaxes for specifying library paths and libraries, respectively.

explicit_library_search_paths: List[pathlib.Path] View Source

671    @property
672    def explicit_library_search_paths(self: CanonicalizedArgsProtocol) -> List[Path]:
673        """
674        Returns a list of library search paths that are explicitly specified in
675        the tool's invocation. Semantically, these paths are (normally) given
676        priority over all other search paths.
677
678        NOTE: This is **not** the same as the complete list of library search paths,
679        which is tool-specific and host-dependent.
680        """
681
682        shorts = util.collect_option_values(self.canonicalized_args, "-L")
683        longs = util.collect_option_values(
684            self.canonicalized_args, "--library-path", style=util.OptionValueStyle.EqualOrSpace
685        )
686
687        sorted_values = sorted(itertools.chain(shorts, longs), key=lambda v: v[0])
688
689        return [(self.cwd / value[1]).resolve() for value in sorted_values]

Returns a list of library search paths that are explicitly specified in the tool's invocation. Semantically, these paths are (normally) given priority over all other search paths.

NOTE: This is not the same as the complete list of library search paths, which is tool-specific and host-dependent.

library_names: List[str] View Source

691    @property
692    def library_names(self: CanonicalizedArgsProtocol) -> List[str]:
693        """
694        Returns a list of library names (without suffixes) for libraries that
695        are explicitly specified in the tool's invocation.
696
697        NOTE: This list does not include any libraries that are
698        listed as "inputs" to the tool rather than as linkage specifications.
699        """
700
701        shorts = util.collect_option_values(self.canonicalized_args, "-l")
702        longs = util.collect_option_values(
703            self.canonicalized_args, "--library", style=util.OptionValueStyle.EqualOrSpace
704        )
705
706        sorted_values = sorted(itertools.chain(shorts, longs), key=lambda v: v[0])
707
708        return [f"lib{value[1]}" for value in sorted_values]

Returns a list of library names (without suffixes) for libraries that are explicitly specified in the tool's invocation.

NOTE: This list does not include any libraries that are listed as "inputs" to the tool rather than as linkage specifications.

class CompilerTool(LinkSearchMixin, ResponseFileMixin, Tool, StdMixin, OptMixin, DefinesMixin, CodeModelMixin): View Source

714class CompilerTool(
715    LinkSearchMixin, ResponseFileMixin, Tool, StdMixin, OptMixin, DefinesMixin, CodeModelMixin
716):
717    """
718    Represents a generic (C or C++) compiler frontend.
719
720    Like `Tool`, `CompilerTool` cannot be instantiated directly.
721    """
722
723    def __init__(self, args: List[str]) -> None:
724        if self.__class__ == CompilerTool:
725            raise NotImplementedError(f"can't instantiate {self.__class__.__name__} directly")
726
727        super().__init__(args)
728
729        # #40 and #41: These should be handled in an overridden implementation
730        # of `canonicalized_args`.
731        injection_vars = COMPILER_FLAG_INJECTION_VARIABLES & self._env.keys()
732        if injection_vars:
733            logger.warning(f"not tracking compiler's own instrumentation: {injection_vars}")
734
735    @property
736    def family(self) -> CompilerFamily:
737        """
738        Returns:
739            A `blight.enums.CompilerFamily` value representing the "family" of compilers
740            that this tool belongs to.
741        """
742
743        # NOTE(ww): Both GCC and Clang support -### as an alias for -v, but
744        # with additional guarantees around argument quoting. Do other families support it?
745
746        result = subprocess.run([self.wrapped_tool(), "-###"], capture_output=True)
747
748        # If the command exited with an error, we're likely dealing with a frontend
749        # that doesn't understand `-###`.
750        if result.returncode != 0:
751            logger.warning("compiler fingerprint failed: frontend didn't recognize -###?")
752            # ...but even still, we can infer a bit from the error message.
753            if b"tcc: error" in result.stderr:
754                return CompilerFamily.Tcc
755            else:
756                return CompilerFamily.Unknown
757
758        # We expect the relevant parts of `-###` on stderr. The lack of any output
759        # again suggests that the frontend doesn't understand the flag.
760        if not result.stderr:
761            logger.warning("compiler fingerprint failed: frontend didn't produce output for -###?")
762            return CompilerFamily.Unknown
763
764        # Finally, we do some silly substring checks.
765        # TODO(ww): Better heuristics here?
766        if b"Apple clang version" in result.stderr:
767            return CompilerFamily.AppleLlvm
768        elif b"clang version" in result.stderr:
769            return CompilerFamily.MainlineLlvm
770        elif b"gcc version" in result.stderr:
771            return CompilerFamily.Gcc
772        else:
773            return CompilerFamily.Unknown
774
775    @property
776    def stage(self) -> CompilerStage:
777        """
778        Returns:
779            A `blight.enums.CompilerStage` value representing the stage that this tool is on
780        """
781
782        # TODO(ww): Refactor this entire method. Both GCC and Clang can actually
783        # run multiple stages per invocation, e.g. `-x c foo.c -x c++ bar.cpp`,
784        # so we should model this as "stages" instead. This, in turn, will require
785        # us to reevaluate our output guesswork below.
786
787        if len(self.canonicalized_args) == 0:
788            return CompilerStage.Unknown
789
790        stage_flag_map = {
791            # NOTE(ww): See the TODO in CompilerStage.
792            "-v": CompilerStage.Unknown,
793            "-###": CompilerStage.Unknown,
794            "-E": CompilerStage.Preprocess,
795            "-fsyntax-only": CompilerStage.SyntaxOnly,
796            "-S": CompilerStage.Assemble,
797            "-c": CompilerStage.CompileObject,
798        }
799
800        for flag, stage in stage_flag_map.items():
801            if flag in self.canonicalized_args:
802                return stage
803
804        # TODO(ww): Handle header precompilation here. GCC doesn't seem to
805        # consider this a real "stage", but it's different enough from every
806        # other stage to warrant special treatment.
807
808        # No explicit stage flag? Both gcc and clang treat this as
809        # "run all stages", so we do too.
810        return CompilerStage.AllStages
811
812    @property
813    def outputs(self) -> List[str]:
814        """
815        Specializes `Tool.outputs` for compiler tools.
816        """
817        outputs = super().outputs
818        if outputs != []:
819            return outputs
820
821        # Without an explicit `-o outfile`, the default output name(s)
822        # depends on the compiler's stage.
823        if self.stage == CompilerStage.Preprocess:
824            # NOTE(ww): The preprocessor stage emits to stdout, but returning "-" as
825            # a sentinel for that is very meh. If only Python had Rust-style enums.
826            return ["-"]
827        elif self.stage == CompilerStage.Assemble:
828            # NOTE(ww): Outputs are created relative to the current working directory,
829            # not relative to their input. We return them as relative paths to
830            # indicate this (maybe we should just fully resolve them?)
831            return [Path(input_).with_suffix(".s").name for input_ in self.inputs]
832        elif self.stage == CompilerStage.CompileObject:
833            return [Path(input_).with_suffix(".o").name for input_ in self.inputs]
834        elif self.stage == CompilerStage.AllStages:
835            # NOTE(ww): This will be wrong when we're doing header precompilation;
836            # see the TODO in `stage`.
837            return ["a.out"]
838        else:
839            return []
840
841    def asdict(self) -> Dict[str, Any]:
842        return {
843            **super().asdict(),
844            "lang": self.lang.name,
845            "std": self.std.name,
846            "stage": self.stage.name,
847            "opt": self.opt.name,
848        }

Represents a generic (C or C++) compiler frontend.

Like Tool, CompilerTool cannot be instantiated directly.

CompilerTool(args: List[str]) View Source

723    def __init__(self, args: List[str]) -> None:
724        if self.__class__ == CompilerTool:
725            raise NotImplementedError(f"can't instantiate {self.__class__.__name__} directly")
726
727        super().__init__(args)
728
729        # #40 and #41: These should be handled in an overridden implementation
730        # of `canonicalized_args`.
731        injection_vars = COMPILER_FLAG_INJECTION_VARIABLES & self._env.keys()
732        if injection_vars:
733            logger.warning(f"not tracking compiler's own instrumentation: {injection_vars}")

family: blight.enums.CompilerFamily View Source

735    @property
736    def family(self) -> CompilerFamily:
737        """
738        Returns:
739            A `blight.enums.CompilerFamily` value representing the "family" of compilers
740            that this tool belongs to.
741        """
742
743        # NOTE(ww): Both GCC and Clang support -### as an alias for -v, but
744        # with additional guarantees around argument quoting. Do other families support it?
745
746        result = subprocess.run([self.wrapped_tool(), "-###"], capture_output=True)
747
748        # If the command exited with an error, we're likely dealing with a frontend
749        # that doesn't understand `-###`.
750        if result.returncode != 0:
751            logger.warning("compiler fingerprint failed: frontend didn't recognize -###?")
752            # ...but even still, we can infer a bit from the error message.
753            if b"tcc: error" in result.stderr:
754                return CompilerFamily.Tcc
755            else:
756                return CompilerFamily.Unknown
757
758        # We expect the relevant parts of `-###` on stderr. The lack of any output
759        # again suggests that the frontend doesn't understand the flag.
760        if not result.stderr:
761            logger.warning("compiler fingerprint failed: frontend didn't produce output for -###?")
762            return CompilerFamily.Unknown
763
764        # Finally, we do some silly substring checks.
765        # TODO(ww): Better heuristics here?
766        if b"Apple clang version" in result.stderr:
767            return CompilerFamily.AppleLlvm
768        elif b"clang version" in result.stderr:
769            return CompilerFamily.MainlineLlvm
770        elif b"gcc version" in result.stderr:
771            return CompilerFamily.Gcc
772        else:
773            return CompilerFamily.Unknown

Returns: A blight.enums.CompilerFamily value representing the "family" of compilers that this tool belongs to.

stage: blight.enums.CompilerStage View Source

775    @property
776    def stage(self) -> CompilerStage:
777        """
778        Returns:
779            A `blight.enums.CompilerStage` value representing the stage that this tool is on
780        """
781
782        # TODO(ww): Refactor this entire method. Both GCC and Clang can actually
783        # run multiple stages per invocation, e.g. `-x c foo.c -x c++ bar.cpp`,
784        # so we should model this as "stages" instead. This, in turn, will require
785        # us to reevaluate our output guesswork below.
786
787        if len(self.canonicalized_args) == 0:
788            return CompilerStage.Unknown
789
790        stage_flag_map = {
791            # NOTE(ww): See the TODO in CompilerStage.
792            "-v": CompilerStage.Unknown,
793            "-###": CompilerStage.Unknown,
794            "-E": CompilerStage.Preprocess,
795            "-fsyntax-only": CompilerStage.SyntaxOnly,
796            "-S": CompilerStage.Assemble,
797            "-c": CompilerStage.CompileObject,
798        }
799
800        for flag, stage in stage_flag_map.items():
801            if flag in self.canonicalized_args:
802                return stage
803
804        # TODO(ww): Handle header precompilation here. GCC doesn't seem to
805        # consider this a real "stage", but it's different enough from every
806        # other stage to warrant special treatment.
807
808        # No explicit stage flag? Both gcc and clang treat this as
809        # "run all stages", so we do too.
810        return CompilerStage.AllStages

Returns: A blight.enums.CompilerStage value representing the stage that this tool is on

outputs: List[str] View Source

812    @property
813    def outputs(self) -> List[str]:
814        """
815        Specializes `Tool.outputs` for compiler tools.
816        """
817        outputs = super().outputs
818        if outputs != []:
819            return outputs
820
821        # Without an explicit `-o outfile`, the default output name(s)
822        # depends on the compiler's stage.
823        if self.stage == CompilerStage.Preprocess:
824            # NOTE(ww): The preprocessor stage emits to stdout, but returning "-" as
825            # a sentinel for that is very meh. If only Python had Rust-style enums.
826            return ["-"]
827        elif self.stage == CompilerStage.Assemble:
828            # NOTE(ww): Outputs are created relative to the current working directory,
829            # not relative to their input. We return them as relative paths to
830            # indicate this (maybe we should just fully resolve them?)
831            return [Path(input_).with_suffix(".s").name for input_ in self.inputs]
832        elif self.stage == CompilerStage.CompileObject:
833            return [Path(input_).with_suffix(".o").name for input_ in self.inputs]
834        elif self.stage == CompilerStage.AllStages:
835            # NOTE(ww): This will be wrong when we're doing header precompilation;
836            # see the TODO in `stage`.
837            return ["a.out"]
838        else:
839            return []

Specializes Tool.outputs for compiler tools.

def asdict(self) -> Dict[str, Any]: View Source

841    def asdict(self) -> Dict[str, Any]:
842        return {
843            **super().asdict(),
844            "lang": self.lang.name,
845            "std": self.std.name,
846            "stage": self.stage.name,
847            "opt": self.opt.name,
848        }

Returns: A dictionary representation of this tool

Inherited Members

LinkSearchMixin: explicit_library_search_paths; library_names
ResponseFileMixin: canonicalized_args
Tool: build_tool; blight_tool; wrapped_tool; run; is_journaling; args; cwd; inputs
StdMixin: std
LangMixin: lang
OptMixin: opt
DefinesMixin: indexed_undefines; defines
CodeModelMixin: code_model

class CC(CompilerTool): View Source

851class CC(CompilerTool):
852    """
853    A specialization of `CompilerTool` for the C compiler frontend.
854    """
855
856    def __repr__(self) -> str:
857        return f"<CC {self.wrapped_tool()} {self.lang} {self.std} {self.stage}>"

A specialization of CompilerTool for the C compiler frontend.

Inherited Members

CompilerTool: CompilerTool; family; stage; outputs; asdict
LinkSearchMixin: explicit_library_search_paths; library_names
ResponseFileMixin: canonicalized_args
Tool: build_tool; blight_tool; wrapped_tool; run; is_journaling; args; cwd; inputs
StdMixin: std
LangMixin: lang
OptMixin: opt
DefinesMixin: indexed_undefines; defines
CodeModelMixin: code_model

class CXX(CompilerTool): View Source

860class CXX(CompilerTool):
861    """
862    A specialization of `CompilerTool` for the C++ compiler frontend.
863    """
864
865    def __repr__(self) -> str:
866        return f"<CXX {self.wrapped_tool()} {self.lang} {self.std} {self.stage}>"

A specialization of CompilerTool for the C++ compiler frontend.

Inherited Members

CompilerTool: CompilerTool; family; stage; outputs; asdict
LinkSearchMixin: explicit_library_search_paths; library_names
ResponseFileMixin: canonicalized_args
Tool: build_tool; blight_tool; wrapped_tool; run; is_journaling; args; cwd; inputs
StdMixin: std
LangMixin: lang
OptMixin: opt
DefinesMixin: indexed_undefines; defines
CodeModelMixin: code_model

class CPP(Tool, StdMixin, DefinesMixin): View Source

869class CPP(Tool, StdMixin, DefinesMixin):
870    """
871    Represents the C preprocessor tool.
872    """
873
874    def __repr__(self) -> str:
875        return f"<CPP {self.wrapped_tool()} {self.lang} {self.std}>"
876
877    def asdict(self) -> Dict[str, Any]:
878        return {**super().asdict(), "lang": self.lang.name, "std": self.std.name}

Represents the C preprocessor tool.

def asdict(self) -> Dict[str, Any]: View Source

877    def asdict(self) -> Dict[str, Any]:
878        return {**super().asdict(), "lang": self.lang.name, "std": self.std.name}

Returns: A dictionary representation of this tool

Inherited Members

Tool: Tool; build_tool; blight_tool; wrapped_tool; run; is_journaling; args; canonicalized_args; cwd; inputs; outputs
StdMixin: std
LangMixin: lang
DefinesMixin: indexed_undefines; defines

class LD(LinkSearchMixin, ResponseFileMixin, Tool): View Source

881class LD(LinkSearchMixin, ResponseFileMixin, Tool):
882    """
883    Represents the linker.
884    """
885
886    @property
887    def outputs(self) -> List[str]:
888        """
889        Specializes `Tool.outputs` for the linker.
890        """
891
892        outputs = super().outputs
893        if outputs != []:
894            return outputs
895
896        # The GNU linker additionally supports --output=OUTFILE and
897        # --output OUTFILE. Handle them here.
898        output_flag_index = util.rindex_prefix(self.canonicalized_args, "--output")
899        if output_flag_index is None:
900            return ["a.out"]
901
902        # Split option form.
903        if self.canonicalized_args[output_flag_index] == "--output":
904            return [self.canonicalized_args[output_flag_index + 1]]
905
906        # Assignment form.
907        return [self.canonicalized_args[output_flag_index].split("=")[1]]
908
909    def __repr__(self) -> str:
910        return f"<LD {self.wrapped_tool()}>"

Represents the linker.

outputs: List[str] View Source

886    @property
887    def outputs(self) -> List[str]:
888        """
889        Specializes `Tool.outputs` for the linker.
890        """
891
892        outputs = super().outputs
893        if outputs != []:
894            return outputs
895
896        # The GNU linker additionally supports --output=OUTFILE and
897        # --output OUTFILE. Handle them here.
898        output_flag_index = util.rindex_prefix(self.canonicalized_args, "--output")
899        if output_flag_index is None:
900            return ["a.out"]
901
902        # Split option form.
903        if self.canonicalized_args[output_flag_index] == "--output":
904            return [self.canonicalized_args[output_flag_index + 1]]
905
906        # Assignment form.
907        return [self.canonicalized_args[output_flag_index].split("=")[1]]

Specializes Tool.outputs for the linker.

Inherited Members

Tool: Tool; build_tool; blight_tool; wrapped_tool; run; is_journaling; asdict; args; cwd; inputs
LinkSearchMixin: explicit_library_search_paths; library_names
ResponseFileMixin: canonicalized_args

class AS(ResponseFileMixin, Tool): View Source

913class AS(ResponseFileMixin, Tool):
914    """
915    Represents the assembler.
916    """
917
918    def __repr__(self) -> str:
919        return f"<AS {self.wrapped_tool()}>"

Represents the assembler.

Inherited Members

Tool: Tool; build_tool; blight_tool; wrapped_tool; run; is_journaling; asdict; args; cwd; inputs; outputs
ResponseFileMixin: canonicalized_args

class AR(ResponseFileMixin, Tool): View Source

922class AR(ResponseFileMixin, Tool):
923    """
924    Represents the archiver.
925    """
926
927    @property
928    def outputs(self) -> List[str]:
929        """
930        Specializes `Tool.outputs` for the archiver.
931        """
932
933        # TODO(ww): This doesn't support `ar x`, which explodes the archive
934        # (i.e., treats it as input) instead of treats it as output.
935        # It would be pretty strange for a build system to do this, but it's
936        # probably something we should detect at the very least.
937
938        # TODO(ww): We also don't support `ar t`, which queries the given
939        # archive to provide a table listing of its contents.
940
941        # NOTE(ww): `ar`'s POSIX and GNU CLIs are annoyingly complicated.
942        # We save ourselves some pain by scanning from left-to-right, looking
943        # for the first argument that looks like an archive output
944        # (since the archiver only ever produces one output at a time).
945        for arg in self.canonicalized_args:
946            if arg.startswith("-"):
947                continue
948
949            maybe_archive_suffixes = Path(arg).suffixes
950            if len(maybe_archive_suffixes) > 0 and maybe_archive_suffixes[0] == ".a":
951                return [arg]
952
953        logger.debug("couldn't infer output for archiver")
954        return []
955
956    def __repr__(self) -> str:
957        return f"<AR {self.wrapped_tool()}>"

Represents the archiver.

outputs: List[str] View Source

927    @property
928    def outputs(self) -> List[str]:
929        """
930        Specializes `Tool.outputs` for the archiver.
931        """
932
933        # TODO(ww): This doesn't support `ar x`, which explodes the archive
934        # (i.e., treats it as input) instead of treats it as output.
935        # It would be pretty strange for a build system to do this, but it's
936        # probably something we should detect at the very least.
937
938        # TODO(ww): We also don't support `ar t`, which queries the given
939        # archive to provide a table listing of its contents.
940
941        # NOTE(ww): `ar`'s POSIX and GNU CLIs are annoyingly complicated.
942        # We save ourselves some pain by scanning from left-to-right, looking
943        # for the first argument that looks like an archive output
944        # (since the archiver only ever produces one output at a time).
945        for arg in self.canonicalized_args:
946            if arg.startswith("-"):
947                continue
948
949            maybe_archive_suffixes = Path(arg).suffixes
950            if len(maybe_archive_suffixes) > 0 and maybe_archive_suffixes[0] == ".a":
951                return [arg]
952
953        logger.debug("couldn't infer output for archiver")
954        return []

Specializes Tool.outputs for the archiver.

Inherited Members

Tool: Tool; build_tool; blight_tool; wrapped_tool; run; is_journaling; asdict; args; cwd; inputs
ResponseFileMixin: canonicalized_args

class STRIP(ResponseFileMixin, Tool): View Source

960class STRIP(ResponseFileMixin, Tool):
961    """
962    Represents the stripping tool.
963    """
964
965    def __repr__(self) -> str:
966        return f"<STRIP {self.wrapped_tool()}>"

Represents the stripping tool.

Inherited Members

Tool: Tool; build_tool; blight_tool; wrapped_tool; run; is_journaling; asdict; args; cwd; inputs; outputs
ResponseFileMixin: canonicalized_args

class INSTALL(Tool): View Source

 969class INSTALL(Tool):
 970    """
 971    Represents the install tool.
 972    """
 973
 974    def _install_parser(self) -> util.ArgumentParser:
 975        parser = util.ArgumentParser(
 976            prog=self.build_tool().value, add_help=False, allow_abbrev=False
 977        )
 978
 979        def add_flag(short: str, dest: str, **kwargs: Any) -> None:
 980            parser.add_argument(short, action="store_true", dest=dest, **kwargs)
 981
 982        add_flag("-b", "overwrite")
 983        add_flag("-C", "copy_no_mtime")
 984        add_flag("-c", "copy", default=True)
 985        add_flag("-d", "directory_mode")
 986        add_flag("-M", "disable_mmap")
 987        add_flag("-p", "preserve_mtime")
 988        add_flag("-S", "safe_copy")
 989        add_flag("-s", "exec_strip")
 990        add_flag("-v", "verbose")
 991        parser.add_argument("-f", dest="flags")
 992        parser.add_argument("-g", dest="group")
 993        parser.add_argument("-m", dest="mode")
 994        parser.add_argument("-o", dest="owner")
 995        parser.add_argument("trailing", nargs="+", default=[])
 996
 997        return parser
 998
 999    def __init__(self, args: List[str]) -> None:
1000        super().__init__(args)
1001        self._parser = self._install_parser()
1002
1003        try:
1004            (self._matches, self._unknown) = self._parser.parse_known_args(args)
1005        except ValueError as e:
1006            logger.error(f"argparse error: {e}")
1007            self._matches = self._parser.default_namespace()
1008            self._unknown = args
1009
1010    @property
1011    def directory_mode(self) -> bool:
1012        """
1013        Returns whether this `install` invocation is in "directory mode," i.e.
1014        is creating directories instead of installing files.
1015        """
1016        return self._matches.directory_mode  # type: ignore[no-any-return]
1017
1018    @property
1019    def inputs(self) -> List[str]:
1020        """
1021        Specializes `Tool.inputs` for the install tool.
1022        """
1023
1024        # Directory mode: all positionals are new directories, i.e. outputs.
1025        if self.directory_mode:
1026            return []
1027
1028        # `install` requires at least two positionals outside of directory mode,
1029        # so this probably indicates an unknown GNUism like `--help`.
1030        if len(self._matches.trailing) < 2:
1031            logger.debug(f"install called with no positionals (hint: unknown args: {self._unknown}")
1032            return []
1033
1034        # Otherwise, we're either installing one file to another or we're
1035        # installing multiple files to a directory. Test the last positional
1036        # to determine which mode we're in.
1037        maybe_dir = self._cwd / self._matches.trailing[-1]
1038        if maybe_dir.is_dir():
1039            return self._matches.trailing[0:-1]  # type: ignore[no-any-return]
1040        else:
1041            return [self._matches.trailing[0]]
1042
1043    @property
1044    def outputs(self) -> List[str]:
1045        """
1046        Specializes `Tool.outputs` for the install tool.
1047        """
1048
1049        # Directory mode: treat created directories as outputs.
1050        if self.directory_mode:
1051            return self._matches.trailing  # type: ignore[no-any-return]
1052
1053        # `install` requires at least two positionals outside of directory mode,
1054        # so this probably indicates an unknown GNUism like `--help`.
1055        if len(self._matches.trailing) < 2:
1056            logger.debug(f"install called with no positionals (hint: unknown args: {self._unknown}")
1057            return []
1058
1059        # If we're installing multiple files to a destination directory,
1060        # then our outputs are every input, under the destination.
1061        # Otherwise, our output is a single file.
1062        maybe_dir = self._cwd / self._matches.trailing[-1]
1063        if maybe_dir.is_dir():
1064            inputs = [Path(input_) for input_ in self._matches.trailing[0:-1]]
1065            return [str(maybe_dir / input_.name) for input_ in inputs]
1066        else:
1067            return [self._matches.trailing[-1]]
1068
1069    def __repr__(self) -> str:
1070        return f"<INSTALL {self.wrapped_tool()}>"

Represents the install tool.

INSTALL(args: List[str]) View Source

 999    def __init__(self, args: List[str]) -> None:
1000        super().__init__(args)
1001        self._parser = self._install_parser()
1002
1003        try:
1004            (self._matches, self._unknown) = self._parser.parse_known_args(args)
1005        except ValueError as e:
1006            logger.error(f"argparse error: {e}")
1007            self._matches = self._parser.default_namespace()
1008            self._unknown = args

directory_mode: bool View Source

1010    @property
1011    def directory_mode(self) -> bool:
1012        """
1013        Returns whether this `install` invocation is in "directory mode," i.e.
1014        is creating directories instead of installing files.
1015        """
1016        return self._matches.directory_mode  # type: ignore[no-any-return]

Returns whether this install invocation is in "directory mode," i.e. is creating directories instead of installing files.

inputs: List[str] View Source

1018    @property
1019    def inputs(self) -> List[str]:
1020        """
1021        Specializes `Tool.inputs` for the install tool.
1022        """
1023
1024        # Directory mode: all positionals are new directories, i.e. outputs.
1025        if self.directory_mode:
1026            return []
1027
1028        # `install` requires at least two positionals outside of directory mode,
1029        # so this probably indicates an unknown GNUism like `--help`.
1030        if len(self._matches.trailing) < 2:
1031            logger.debug(f"install called with no positionals (hint: unknown args: {self._unknown}")
1032            return []
1033
1034        # Otherwise, we're either installing one file to another or we're
1035        # installing multiple files to a directory. Test the last positional
1036        # to determine which mode we're in.
1037        maybe_dir = self._cwd / self._matches.trailing[-1]
1038        if maybe_dir.is_dir():
1039            return self._matches.trailing[0:-1]  # type: ignore[no-any-return]
1040        else:
1041            return [self._matches.trailing[0]]

Specializes Tool.inputs for the install tool.

outputs: List[str] View Source

1043    @property
1044    def outputs(self) -> List[str]:
1045        """
1046        Specializes `Tool.outputs` for the install tool.
1047        """
1048
1049        # Directory mode: treat created directories as outputs.
1050        if self.directory_mode:
1051            return self._matches.trailing  # type: ignore[no-any-return]
1052
1053        # `install` requires at least two positionals outside of directory mode,
1054        # so this probably indicates an unknown GNUism like `--help`.
1055        if len(self._matches.trailing) < 2:
1056            logger.debug(f"install called with no positionals (hint: unknown args: {self._unknown}")
1057            return []
1058
1059        # If we're installing multiple files to a destination directory,
1060        # then our outputs are every input, under the destination.
1061        # Otherwise, our output is a single file.
1062        maybe_dir = self._cwd / self._matches.trailing[-1]
1063        if maybe_dir.is_dir():
1064            inputs = [Path(input_) for input_ in self._matches.trailing[0:-1]]
1065            return [str(maybe_dir / input_.name) for input_ in inputs]
1066        else:
1067            return [self._matches.trailing[-1]]

Specializes Tool.outputs for the install tool.

Inherited Members

Tool: build_tool; blight_tool; wrapped_tool; run; is_journaling; asdict; args; canonicalized_args; cwd