diff --git a/clang/include/clang/Basic/BuiltinsLoongArchLASX.def b/clang/include/clang/Basic/BuiltinsLoongArchLASX.def index 4cf51cc000f6..f644b820a618 100644 --- a/clang/include/clang/Basic/BuiltinsLoongArchLASX.def +++ b/clang/include/clang/Basic/BuiltinsLoongArchLASX.def @@ -1,988 +1,988 @@ //=BuiltinsLoongArchLASX.def - LoongArch Builtin function database -- C++ -*-=// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file defines the LoongArch-specific LASX builtin function database. // Users of this file must define the BUILTIN macro to make use of this // information. // //===----------------------------------------------------------------------===// -TARGET_BUILTIN(__builtin_lasx_xvadd_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvadd_b, "V32ScV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvadd_h, "V16sV16sV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvadd_w, "V8iV8iV8i", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvadd_d, "V4LLiV4LLiV4LLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvadd_q, "V4LLiV4LLiV4LLi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvsub_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsub_b, "V32ScV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsub_h, "V16sV16sV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsub_w, "V8iV8iV8i", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsub_d, "V4LLiV4LLiV4LLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsub_q, "V4LLiV4LLiV4LLi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvaddi_bu, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddi_bu, "V32ScV32ScIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvaddi_hu, "V16sV16sIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvaddi_wu, "V8iV8iIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvaddi_du, "V4LLiV4LLiIUi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvsubi_bu, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubi_bu, "V32ScV32ScIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsubi_hu, "V16sV16sIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsubi_wu, "V8iV8iIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsubi_du, "V4LLiV4LLiIUi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvneg_b, "V32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvneg_b, "V32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvneg_h, "V16sV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvneg_w, "V8iV8i", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvneg_d, "V4LLiV4LLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsadd_b, "V32ScV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsadd_h, "V16SsV16SsV16Ss", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsadd_w, "V8SiV8SiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsadd_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsadd_bu, "V32UcV32UcV32Uc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsadd_hu, "V16UsV16UsV16Us", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsadd_wu, "V8UiV8UiV8Ui", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsadd_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssub_b, "V32ScV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssub_h, "V16SsV16SsV16Ss", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssub_w, "V8SiV8SiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssub_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssub_bu, "V32UcV32UcV32Uc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssub_hu, "V16UsV16UsV16Us", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssub_wu, "V8UiV8UiV8Ui", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssub_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvhaddw_h_b, "V16SsV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvhaddw_w_h, "V8SiV16SsV16Ss", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvhaddw_d_w, "V4SLLiV8SiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvhaddw_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvhaddw_hu_bu, "V16UsV32UcV32Uc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvhaddw_wu_hu, "V8UiV16UsV16Us", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvhaddw_du_wu, "V4ULLiV8UiV8Ui", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvhaddw_qu_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvhsubw_h_b, "V16SsV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvhsubw_w_h, "V8SiV16SsV16Ss", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvhsubw_d_w, "V4SLLiV8SiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvhsubw_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvhsubw_hu_bu, "V16UsV32UcV32Uc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvhsubw_wu_hu, "V8UiV16UsV16Us", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvhsubw_du_wu, "V4ULLiV8UiV8Ui", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvhsubw_qu_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvaddwev_h_b, "V16sV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwev_h_b, "V16sV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvaddwev_w_h, "V8SiV16sV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvaddwev_d_w, "V4LLiV8SiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvaddwev_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvaddwod_h_b, "V16sV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwod_h_b, "V16sV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvaddwod_w_h, "V8SiV16sV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvaddwod_d_w, "V4LLiV8SiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvaddwod_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvsubwev_h_b, "V16sV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubwev_h_b, "V16sV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsubwev_w_h, "V8SiV16sV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsubwev_d_w, "V4LLiV8SiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsubwev_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvsubwod_h_b, "V16sV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubwod_h_b, "V16sV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsubwod_w_h, "V8SiV16sV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsubwod_d_w, "V4LLiV8SiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsubwod_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvaddwev_h_bu, "V16sV32UcV32Uc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvaddwev_w_hu, "V8SiV16UsV16Us", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvaddwev_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvaddwev_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvaddwod_h_bu, "V16sV32UcV32Uc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvaddwod_w_hu, "V8SiV16UsV16Us", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvaddwod_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvaddwod_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsubwev_h_bu, "V16sV32UcV32Uc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsubwev_w_hu, "V8SiV16UsV16Us", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsubwev_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsubwev_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsubwod_h_bu, "V16sV32UcV32Uc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsubwod_w_hu, "V8SiV16UsV16Us", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsubwod_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsubwod_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvaddwev_h_bu_b, "V16sV32UcV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwev_h_bu_b, "V16sV32UcV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvaddwev_w_hu_h, "V8SiV16UsV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvaddwev_d_wu_w, "V4LLiV8UiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvaddwev_q_du_d, "V4LLiV4ULLiV4LLi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvaddwod_h_bu_b, "V16sV32UcV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwod_h_bu_b, "V16sV32UcV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvaddwod_w_hu_h, "V8SiV16UsV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvaddwod_d_wu_w, "V4LLiV8UiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvaddwod_q_du_d, "V4LLiV4ULLiV4LLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvavg_b, "V32ScV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvavg_h, "V16SsV16SsV16Ss", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvavg_w, "V8SiV8SiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvavg_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvavg_bu, "V32UcV32UcV32Uc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvavg_hu, "V16UsV16UsV16Us", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvavg_wu, "V8UiV8UiV8Ui", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvavg_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvavgr_b, "V32ScV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvavgr_h, "V16SsV16SsV16Ss", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvavgr_w, "V8SiV8SiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvavgr_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvavgr_bu, "V32UcV32UcV32Uc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvavgr_hu, "V16UsV16UsV16Us", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvavgr_wu, "V8UiV8UiV8Ui", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvavgr_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvabsd_b, "V32ScV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvabsd_h, "V16SsV16SsV16Ss", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvabsd_w, "V8SiV8SiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvabsd_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvabsd_bu, "V32UcV32UcV32Uc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvabsd_hu, "V16UsV16UsV16Us", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvabsd_wu, "V8UiV8UiV8Ui", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvabsd_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvadda_b, "V32ScV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvadda_h, "V16SsV16SsV16Ss", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvadda_w, "V8SiV8SiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvadda_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmax_b, "V32ScV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmax_h, "V16SsV16SsV16Ss", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmax_w, "V8SiV8SiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmax_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmaxi_b, "V32ScV32ScIi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmaxi_h, "V16SsV16SsIi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmaxi_w, "V8SiV8SiIi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmaxi_d, "V4SLLiV4SLLiIi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmax_bu, "V32UcV32UcV32Uc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmax_hu, "V16UsV16UsV16Us", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmax_wu, "V8UiV8UiV8Ui", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmax_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmaxi_bu, "V32UcV32UcIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmaxi_hu, "V16UsV16UsIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmaxi_wu, "V8UiV8UiIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmaxi_du, "V4ULLiV4ULLiIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmin_b, "V32ScV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmin_h, "V16SsV16SsV16Ss", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmin_w, "V8SiV8SiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmin_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmini_b, "V32ScV32ScIi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmini_h, "V16SsV16SsIi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmini_w, "V8SiV8SiIi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmini_d, "V4SLLiV4SLLiIi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmin_bu, "V32UcV32UcV32Uc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmin_hu, "V16UsV16UsV16Us", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmin_wu, "V8UiV8UiV8Ui", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmin_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmini_bu, "V32UcV32UcIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmini_hu, "V16UsV16UsIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmini_wu, "V8UiV8UiIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmini_du, "V4ULLiV4ULLiIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmul_b, "V32ScV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmul_h, "V16SsV16SsV16Ss", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmul_w, "V8SiV8SiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmul_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvmuh_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmuh_b, "V32ScV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmuh_h, "V16sV16sV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmuh_w, "V8iV8iV8i", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmuh_d, "V4LLiV4LLiV4LLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmuh_bu, "V32UcV32UcV32Uc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmuh_hu, "V16UsV16UsV16Us", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmuh_wu, "V8UiV8UiV8Ui", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmuh_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvmulwev_h_b, "V16sV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwev_h_b, "V16sV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmulwev_w_h, "V8SiV16sV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmulwev_d_w, "V4LLiV8SiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmulwev_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvmulwod_h_b, "V16sV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwod_h_b, "V16sV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmulwod_w_h, "V8SiV16sV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmulwod_d_w, "V4LLiV8SiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmulwod_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmulwev_h_bu, "V16sV32UcV32Uc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmulwev_w_hu, "V8SiV16UsV16Us", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmulwev_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmulwev_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmulwod_h_bu, "V16sV32UcV32Uc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmulwod_w_hu, "V8SiV16UsV16Us", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmulwod_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmulwod_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvmulwev_h_bu_b, "V16sV32UcV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwev_h_bu_b, "V16sV32UcV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmulwev_w_hu_h, "V8SiV16UsV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmulwev_d_wu_w, "V4LLiV8UiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmulwev_q_du_d, "V4LLiV4ULLiV4LLi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvmulwod_h_bu_b, "V16sV32UcV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwod_h_bu_b, "V16sV32UcV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmulwod_w_hu_h, "V8SiV16UsV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmulwod_d_wu_w, "V4LLiV8UiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmulwod_q_du_d, "V4LLiV4ULLiV4LLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmadd_b, "V32ScV32ScV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmadd_h, "V16SsV16SsV16SsV16Ss", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmadd_w, "V8SiV8SiV8SiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmadd_d, "V4SLLiV4SLLiV4SLLiV4SLLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmsub_b, "V32ScV32ScV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmsub_h, "V16SsV16SsV16SsV16Ss", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmsub_w, "V8SiV8SiV8SiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmsub_d, "V4SLLiV4SLLiV4SLLiV4SLLi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvmaddwev_h_b, "V16sV16sV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwev_h_b, "V16sV16sV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmaddwev_w_h, "V8SiV8SiV16sV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmaddwev_d_w, "V4LLiV4LLiV8SiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmaddwev_q_d, "V4LLiV4LLiV4LLiV4LLi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvmaddwod_h_b, "V16sV16sV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwod_h_b, "V16sV16sV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmaddwod_w_h, "V8SiV8SiV16sV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmaddwod_d_w, "V4LLiV4LLiV8SiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmaddwod_q_d, "V4LLiV4LLiV4LLiV4LLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmaddwev_h_bu, "V16UsV16UsV32UcV32Uc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmaddwev_w_hu, "V8UiV8UiV16UsV16Us", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmaddwev_d_wu, "V4ULLiV4ULLiV8UiV8Ui", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmaddwev_q_du, "V4ULLiV4ULLiV4ULLiV4ULLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmaddwod_h_bu, "V16UsV16UsV32UcV32Uc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmaddwod_w_hu, "V8UiV8UiV16UsV16Us", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmaddwod_d_wu, "V4ULLiV4ULLiV8UiV8Ui", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmaddwod_q_du, "V4ULLiV4ULLiV4ULLiV4ULLi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvmaddwev_h_bu_b, "V16sV16sV32UcV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwev_h_bu_b, "V16sV16sV32UcV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmaddwev_w_hu_h, "V8SiV8SiV16UsV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmaddwev_d_wu_w, "V4LLiV4LLiV8UiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmaddwev_q_du_d, "V4LLiV4LLiV4ULLiV4LLi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvmaddwod_h_bu_b, "V16sV16sV32UcV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwod_h_bu_b, "V16sV16sV32UcV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmaddwod_w_hu_h, "V8SiV8SiV16UsV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmaddwod_d_wu_w, "V4LLiV4LLiV8UiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmaddwod_q_du_d, "V4LLiV4LLiV4ULLiV4LLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvdiv_b, "V32ScV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvdiv_h, "V16SsV16SsV16Ss", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvdiv_w, "V8SiV8SiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvdiv_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvdiv_bu, "V32UcV32UcV32Uc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvdiv_hu, "V16UsV16UsV16Us", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvdiv_wu, "V8UiV8UiV8Ui", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvdiv_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmod_b, "V32ScV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmod_h, "V16SsV16SsV16Ss", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmod_w, "V8SiV8SiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmod_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmod_bu, "V32UcV32UcV32Uc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmod_hu, "V16UsV16UsV16Us", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmod_wu, "V8UiV8UiV8Ui", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmod_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsat_b, "V32ScV32ScIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsat_h, "V16SsV16SsIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsat_w, "V8SiV8SiIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsat_d, "V4SLLiV4SLLiIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsat_bu, "V32UcV32UcIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsat_hu, "V16UsV16UsIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsat_wu, "V8UiV8UiIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsat_du, "V4ULLiV4ULLiIUi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvexth_h_b, "V16sV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvexth_h_b, "V16sV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvexth_w_h, "V8SiV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvexth_d_w, "V4LLiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvexth_q_d, "V4LLiV4LLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvexth_hu_bu, "V16UsV32Uc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvexth_wu_hu, "V8UiV16Us", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvexth_du_wu, "V4ULLiV8Ui", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvexth_qu_du, "V4ULLiV4ULLi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_vext2xv_h_b, "V16sV32c", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_vext2xv_w_b, "V8SiV32c", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_vext2xv_d_b, "V4LLiV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_vext2xv_h_b, "V16sV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_vext2xv_w_b, "V8SiV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_vext2xv_d_b, "V4LLiV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_vext2xv_w_h, "V8SiV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_vext2xv_d_h, "V4LLiV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_vext2xv_d_w, "V4LLiV8Si", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_vext2xv_hu_bu, "V16sV32c", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_vext2xv_wu_bu, "V8SiV32c", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_vext2xv_du_bu, "V4LLiV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_vext2xv_hu_bu, "V16sV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_vext2xv_wu_bu, "V8SiV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_vext2xv_du_bu, "V4LLiV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_vext2xv_wu_hu, "V8SiV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_vext2xv_du_hu, "V4LLiV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_vext2xv_du_wu, "V4LLiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsigncov_b, "V32ScV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsigncov_h, "V16SsV16SsV16Ss", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsigncov_w, "V8SiV8SiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsigncov_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvmskltz_b, "V32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmskltz_b, "V32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmskltz_h, "V16sV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmskltz_w, "V8iV8i", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvmskltz_d, "V4LLiV4LLi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvmskgez_b, "V32cV32c", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvmsknz_b, "V16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmskgez_b, "V32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmsknz_b, "V32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvldi, "V4LLiIi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvrepli_b, "V32cIi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrepli_b, "V32ScIi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvrepli_h, "V16sIi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvrepli_w, "V8iIi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvrepli_d, "V4LLiIi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvand_v, "V32UcV32UcV32Uc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvor_v, "V32UcV32UcV32Uc", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvxor_v, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvxor_v, "V32UcV32UcV32Uc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvnor_v, "V32UcV32UcV32Uc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvandn_v, "V32UcV32UcV32Uc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvorn_v, "V32ScV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvandi_b, "V32UcV32UcIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvori_b, "V32UcV32UcIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvxori_b, "V32UcV32UcIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvnori_b, "V32UcV32UcIUi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvsll_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsll_b, "V32ScV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsll_h, "V16sV16sV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsll_w, "V8iV8iV8i", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsll_d, "V4LLiV4LLiV4LLi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvslli_b, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslli_b, "V32ScV32ScIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvslli_h, "V16sV16sIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvslli_w, "V8iV8iIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvslli_d, "V4LLiV4LLiIUi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvsrl_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrl_b, "V32ScV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrl_h, "V16sV16sV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrl_w, "V8iV8iV8i", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrl_d, "V4LLiV4LLiV4LLi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvsrli_b, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrli_b, "V32ScV32ScIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrli_h, "V16sV16sIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrli_w, "V8iV8iIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrli_d, "V4LLiV4LLiIUi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvsra_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsra_b, "V32ScV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsra_h, "V16sV16sV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsra_w, "V8iV8iV8i", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsra_d, "V4LLiV4LLiV4LLi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvsrai_b, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrai_b, "V32ScV32ScIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrai_h, "V16sV16sIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrai_w, "V8iV8iIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrai_d, "V4LLiV4LLiIUi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvrotr_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrotr_b, "V32ScV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvrotr_h, "V16sV16sV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvrotr_w, "V8iV8iV8i", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvrotr_d, "V4LLiV4LLiV4LLi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvrotri_b, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrotri_b, "V32ScV32ScIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvrotri_h, "V16sV16sIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvrotri_w, "V8iV8iIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvrotri_d, "V4LLiV4LLiIUi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvsllwil_h_b, "V16sV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsllwil_h_b, "V16sV32ScIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsllwil_w_h, "V8SiV16sIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsllwil_d_w, "V4LLiV8SiIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvextl_q_d, "V4LLiV4LLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsllwil_hu_bu, "V16UsV32UcIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsllwil_wu_hu, "V8UiV16UsIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsllwil_du_wu, "V4ULLiV8UiIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvextl_qu_du, "V4LLiV4ULLi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvsrlr_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlr_b, "V32ScV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrlr_h, "V16sV16sV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrlr_w, "V8iV8iV8i", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrlr_d, "V4LLiV4LLiV4LLi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvsrlri_b, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlri_b, "V32ScV32ScIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrlri_h, "V16sV16sIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrlri_w, "V8iV8iIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrlri_d, "V4LLiV4LLiIUi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvsrar_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrar_b, "V32ScV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrar_h, "V16sV16sV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrar_w, "V8iV8iV8i", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrar_d, "V4LLiV4LLiV4LLi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvsrari_b, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrari_b, "V32ScV32ScIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrari_h, "V16sV16sIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrari_w, "V8iV8iIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrari_d, "V4LLiV4LLiIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrln_b_h, "V32ScV16sV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrln_h_w, "V16sV8SiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrln_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsran_b_h, "V32ScV16sV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsran_h_w, "V16sV8SiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsran_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvsrlni_b_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlni_b_h, "V32ScV32ScV32ScIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrlni_h_w, "V16sV16sV16sIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrlni_w_d, "V8iV8iV8iIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrlni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvsrani_b_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrani_b_h, "V32ScV32ScV32ScIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrani_h_w, "V16sV16sV16sIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrani_w_d, "V8iV8iV8iIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrani_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrlrn_b_h, "V32ScV16sV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrlrn_h_w, "V16sV8SiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrlrn_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrarn_b_h, "V32ScV16sV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrarn_h_w, "V16sV8SiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrarn_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvsrlrni_b_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlrni_b_h, "V32ScV32ScV32ScIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrlrni_h_w, "V16sV16sV16sIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrlrni_w_d, "V8iV8iV8iIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrlrni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvsrarni_b_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrarni_b_h, "V32ScV32ScV32ScIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrarni_h_w, "V16sV16sV16sIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrarni_w_d, "V8iV8iV8iIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsrarni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrln_b_h, "V32ScV16sV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrln_h_w, "V16sV8SiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrln_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssran_b_h, "V32ScV16sV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssran_h_w, "V16sV8SiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssran_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrln_bu_h, "V32UcV16UsV16Us", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrln_hu_w, "V16UsV8UiV8Ui", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrln_wu_d, "V8UiV4ULLiV4ULLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssran_bu_h, "V32UcV16UsV16Us", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssran_hu_w, "V16UsV8UiV8Ui", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssran_wu_d, "V8UiV4ULLiV4ULLi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvssrlni_b_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlni_b_h, "V32ScV32ScV32ScIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrlni_h_w, "V16sV16sV16sIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrlni_w_d, "V8iV8iV8iIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrlni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvssrani_b_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrani_b_h, "V32ScV32ScV32ScIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrani_h_w, "V16sV16sV16sIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrani_w_d, "V8iV8iV8iIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrani_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvssrlrni_bu_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlrni_bu_h, "V32ScV32ScV32ScIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrlrni_hu_w, "V16sV16sV16sIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrlrni_wu_d, "V8iV8iV8iIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrlrni_du_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvssrani_bu_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrani_bu_h, "V32ScV32ScV32ScIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrani_hu_w, "V16sV16sV16sIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrani_wu_d, "V8iV8iV8iIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrani_du_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrlrn_b_h, "V32ScV16sV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrlrn_h_w, "V16sV8SiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrlrn_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrarn_b_h, "V32ScV16sV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrarn_h_w, "V16sV8SiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrarn_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrlrn_bu_h, "V32UcV16UsV16Us", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrlrn_hu_w, "V16UsV8UiV8Ui", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrlrn_wu_d, "V8UiV4ULLiV4ULLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrarn_bu_h, "V32UcV16UsV16Us", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrarn_hu_w, "V16UsV8UiV8Ui", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrarn_wu_d, "V8UiV4ULLiV4ULLi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvssrlrni_b_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlrni_b_h, "V32ScV32ScV32ScIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrlrni_h_w, "V16sV16sV16sIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrlrni_w_d, "V8iV8iV8iIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrlrni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvssrarni_b_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrarni_b_h, "V32ScV32ScV32ScIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrarni_h_w, "V16sV16sV16sIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrarni_w_d, "V8iV8iV8iIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrarni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvssrlni_bu_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlni_bu_h, "V32ScV32ScV32ScIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrlni_hu_w, "V16sV16sV16sIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrlni_wu_d, "V8iV8iV8iIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrlni_du_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvssrarni_bu_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrarni_bu_h, "V32ScV32ScV32ScIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrarni_hu_w, "V16sV16sV16sIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrarni_wu_d, "V8iV8iV8iIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvssrarni_du_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvclo_b, "V32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvclo_h, "V16SsV16Ss", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvclo_w, "V8SiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvclo_d, "V4SLLiV4SLLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvclz_b, "V32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvclz_h, "V16SsV16Ss", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvclz_w, "V8SiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvclz_d, "V4SLLiV4SLLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvpcnt_b, "V32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvpcnt_h, "V16SsV16Ss", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvpcnt_w, "V8SiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvpcnt_d, "V4SLLiV4SLLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvbitclr_b, "V32UcV32UcV32Uc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvbitclr_h, "V16UsV16UsV16Us", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvbitclr_w, "V8UiV8UiV8Ui", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvbitclr_d, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvbitclri_b, "V32UcV32UcIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvbitclri_h, "V16UsV16UsIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvbitclri_w, "V8UiV8UiIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvbitclri_d, "V4ULLiV4ULLiIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvbitset_b, "V32UcV32UcV32Uc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvbitset_h, "V16UsV16UsV16Us", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvbitset_w, "V8UiV8UiV8Ui", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvbitset_d, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvbitseti_b, "V32UcV32UcIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvbitseti_h, "V16UsV16UsIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvbitseti_w, "V8UiV8UiIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvbitseti_d, "V4ULLiV4ULLiIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvbitrev_b, "V32UcV32UcV32Uc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvbitrev_h, "V16UsV16UsV16Us", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvbitrev_w, "V8UiV8UiV8Ui", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvbitrev_d, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvbitrevi_b, "V32UcV32UcIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvbitrevi_h, "V16UsV16UsIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvbitrevi_w, "V8UiV8UiIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvbitrevi_d, "V4ULLiV4ULLiIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfrstp_b, "V32ScV32ScV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfrstp_h, "V16SsV16SsV16SsV16Ss", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvfrstpi_b, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfrstpi_b, "V32ScV32ScV32ScIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfrstpi_h, "V16sV16sV16sIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfadd_s, "V8fV8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfadd_d, "V4dV4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfsub_s, "V8fV8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfsub_d, "V4dV4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfmul_s, "V8fV8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfmul_d, "V4dV4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfdiv_s, "V8fV8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfdiv_d, "V4dV4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfmadd_s, "V8fV8fV8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfmadd_d, "V4dV4dV4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfmsub_s, "V8fV8fV8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfmsub_d, "V4dV4dV4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfnmadd_s, "V8fV8fV8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfnmadd_d, "V4dV4dV4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfnmsub_s, "V8fV8fV8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfnmsub_d, "V4dV4dV4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfmax_s, "V8fV8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfmax_d, "V4dV4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfmin_s, "V8fV8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfmin_d, "V4dV4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfmaxa_s, "V8fV8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfmaxa_d, "V4dV4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfmina_s, "V8fV8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfmina_d, "V4dV4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvflogb_s, "V8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvflogb_d, "V4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfclass_s, "V8iV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfclass_d, "V4LLiV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfsqrt_s, "V8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfsqrt_d, "V4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfrecip_s, "V8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfrecip_d, "V4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfrecipe_s, "V8fV8f", "nc", "lasx,frecipe") TARGET_BUILTIN(__builtin_lasx_xvfrecipe_d, "V4dV4d", "nc", "lasx,frecipe") TARGET_BUILTIN(__builtin_lasx_xvfrsqrt_s, "V8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfrsqrt_d, "V4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfrsqrte_s, "V8fV8f", "nc", "lasx,frecipe") TARGET_BUILTIN(__builtin_lasx_xvfrsqrte_d, "V4dV4d", "nc", "lasx,frecipe") TARGET_BUILTIN(__builtin_lasx_xvfcvtl_s_h, "V8fV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcvth_s_h, "V8fV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcvtl_d_s, "V4dV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcvth_d_s, "V4dV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcvt_h_s, "V16sV8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcvt_s_d, "V8fV4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfrintrne_s, "V8SiV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfrintrne_d, "V4LLiV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfrintrz_s, "V8SiV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfrintrz_d, "V4LLiV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfrintrp_s, "V8SiV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfrintrp_d, "V4LLiV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfrintrm_s, "V8SiV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfrintrm_d, "V4LLiV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfrint_s, "V8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfrint_d, "V4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvftintrne_w_s, "V8SiV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvftintrne_l_d, "V4LLiV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvftintrz_w_s, "V8SiV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvftintrz_l_d, "V4LLiV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvftintrp_w_s, "V8SiV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvftintrp_l_d, "V4LLiV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvftintrm_w_s, "V8SiV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvftintrm_l_d, "V4LLiV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvftint_w_s, "V8SiV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvftint_l_d, "V4SLLiV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvftintrz_wu_s, "V8UiV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvftintrz_lu_d, "V4ULLiV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvftint_wu_s, "V8UiV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvftint_lu_d, "V4ULLiV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvftintrne_w_d, "V8SiV4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvftintrz_w_d, "V8SiV4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvftintrp_w_d, "V8SiV4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvftintrm_w_d, "V8SiV4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvftint_w_d, "V8SiV4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvftintrnel_l_s, "V4LLiV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvftintrneh_l_s, "V4LLiV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvftintrzl_l_s, "V4LLiV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvftintrzh_l_s, "V4LLiV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvftintrpl_l_s, "V4LLiV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvftintrph_l_s, "V4LLiV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvftintrml_l_s, "V4LLiV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvftintrmh_l_s, "V4LLiV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvftintl_l_s, "V4LLiV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvftinth_l_s, "V4LLiV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvffint_s_w, "V8fV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvffint_d_l, "V4dV4SLLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvffint_s_wu, "V8fV8Ui", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvffint_d_lu, "V4dV4ULLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvffintl_d_w, "V4dV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvffinth_d_w, "V4dV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvffint_s_l, "V8fV4LLiV4LLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvseq_b, "V32ScV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvseq_h, "V16SsV16SsV16Ss", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvseq_w, "V8SiV8SiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvseq_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvseqi_b, "V32ScV32ScISi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvseqi_h, "V16SsV16SsISi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvseqi_w, "V8SiV8SiISi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvseqi_d, "V4SLLiV4SLLiISi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsle_b, "V32ScV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsle_h, "V16SsV16SsV16Ss", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsle_w, "V8SiV8SiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsle_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvslei_b, "V32ScV32ScISi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvslei_h, "V16SsV16SsISi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvslei_w, "V8SiV8SiISi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvslei_d, "V4SLLiV4SLLiISi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsle_bu, "V32ScV32UcV32Uc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsle_hu, "V16SsV16UsV16Us", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsle_wu, "V8SiV8UiV8Ui", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvsle_du, "V4SLLiV4ULLiV4ULLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvslei_bu, "V32ScV32UcIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvslei_hu, "V16SsV16UsIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvslei_wu, "V8SiV8UiIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvslei_du, "V4SLLiV4ULLiIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvslt_b, "V32ScV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvslt_h, "V16SsV16SsV16Ss", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvslt_w, "V8SiV8SiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvslt_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvslti_b, "V32ScV32ScISi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvslti_h, "V16SsV16SsISi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvslti_w, "V8SiV8SiISi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvslti_d, "V4SLLiV4SLLiISi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvslt_bu, "V32ScV32UcV32Uc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvslt_hu, "V16SsV16UsV16Us", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvslt_wu, "V8SiV8UiV8Ui", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvslt_du, "V4SLLiV4ULLiV4ULLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvslti_bu, "V32ScV32UcIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvslti_hu, "V16SsV16UsIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvslti_wu, "V8SiV8UiIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvslti_du, "V4SLLiV4ULLiIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_caf_s, "V8SiV8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_caf_d, "V4SLLiV4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_cun_s, "V8SiV8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_cun_d, "V4SLLiV4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_ceq_s, "V8SiV8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_ceq_d, "V4SLLiV4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_cueq_s, "V8SiV8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_cueq_d, "V4SLLiV4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_clt_s, "V8SiV8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_clt_d, "V4SLLiV4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_cult_s, "V8SiV8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_cult_d, "V4SLLiV4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_cle_s, "V8SiV8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_cle_d, "V4SLLiV4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_cule_s, "V8SiV8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_cule_d, "V4SLLiV4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_cne_s, "V8SiV8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_cne_d, "V4SLLiV4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_cor_s, "V8SiV8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_cor_d, "V4SLLiV4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_cune_s, "V8SiV8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_cune_d, "V4SLLiV4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_saf_s, "V8SiV8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_saf_d, "V4SLLiV4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_sun_s, "V8SiV8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_sun_d, "V4SLLiV4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_seq_s, "V8SiV8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_seq_d, "V4SLLiV4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_sueq_s, "V8SiV8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_sueq_d, "V4SLLiV4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_slt_s, "V8SiV8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_slt_d, "V4SLLiV4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_sult_s, "V8SiV8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_sult_d, "V4SLLiV4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_sle_s, "V8SiV8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_sle_d, "V4SLLiV4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_sule_s, "V8SiV8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_sule_d, "V4SLLiV4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_sne_s, "V8SiV8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_sne_d, "V4SLLiV4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_sor_s, "V8SiV8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_sor_d, "V4SLLiV4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_sune_s, "V8SiV8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcmp_sune_d, "V4SLLiV4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvbitsel_v, "V32UcV32UcV32UcV32Uc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvbitseli_b, "V32UcV32UcV32UcIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvreplgr2vr_b, "V32Sci", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvreplgr2vr_h, "V16Ssi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvreplgr2vr_w, "V8Sii", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvreplgr2vr_d, "V4SLLiLLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvinsgr2vr_w, "V8SiV8SiiIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvinsgr2vr_d, "V4SLLiV4SLLiLLiIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvpickve2gr_w, "iV8SiIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvpickve2gr_d, "LLiV4SLLiIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvpickve2gr_wu, "iV8UiIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvpickve2gr_du, "LLiV4ULLiIUi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvreplve_b, "V32cV32cUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvreplve_b, "V32ScV32ScUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvreplve_h, "V16sV16sUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvreplve_w, "V8iV8iUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvreplve_d, "V4LLiV4LLiUi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_b, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_b, "V32ScV32ScIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_h, "V16sV16sIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_w, "V8iV8iIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_d, "V4LLiV4LLiIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvreplve0_b, "V32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvreplve0_h, "V16SsV16Ss", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvreplve0_w, "V8SiV8Si", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvreplve0_d, "V4SLLiV4SLLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvreplve0_q, "V32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvinsve0_w, "V8iV8iV8iIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvinsve0_d, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvpickve_w, "V8iV8iIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvpickve_d, "V4LLiV4LLiIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvpickve_w_f, "V8fV8fIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvpickve_d_f, "V4dV4dIUi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvbsll_v, "V32cV32cIUi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvbsrl_v, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbsll_v, "V32ScV32ScIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbsrl_v, "V32ScV32ScIUi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvpackev_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpackev_b, "V32ScV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvpackev_h, "V16sV16sV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvpackev_w, "V8iV8iV8i", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvpackev_d, "V4LLiV4LLiV4LLi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvpackod_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpackod_b, "V32ScV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvpackod_h, "V16sV16sV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvpackod_w, "V8iV8iV8i", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvpackod_d, "V4LLiV4LLiV4LLi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvpickev_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpickev_b, "V32ScV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvpickev_h, "V16sV16sV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvpickev_w, "V8iV8iV8i", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvpickev_d, "V4LLiV4LLiV4LLi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvpickod_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpickod_b, "V32ScV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvpickod_h, "V16sV16sV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvpickod_w, "V8iV8iV8i", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvpickod_d, "V4LLiV4LLiV4LLi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvilvl_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvilvl_b, "V32ScV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvilvl_h, "V16sV16sV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvilvl_w, "V8iV8iV8i", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvilvl_d, "V4LLiV4LLiV4LLi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvilvh_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvilvh_b, "V32ScV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvilvh_h, "V16sV16sV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvilvh_w, "V8iV8iV8i", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvilvh_d, "V4LLiV4LLiV4LLi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvshuf_b, "V32UcV32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvshuf_b, "V32ScV32ScV32ScV32Sc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvshuf_h, "V16sV16sV16sV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvshuf_w, "V8iV8iV8iV8i", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvshuf_d, "V4LLiV4LLiV4LLiV4LLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvperm_w, "V8iV8iV8i", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvshuf4i_b, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvshuf4i_b, "V32ScV32ScIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvshuf4i_h, "V16sV16sIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvshuf4i_w, "V8iV8iIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvshuf4i_d, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvpermi_w, "V8iV8iV8iIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvpermi_d, "V4LLiV4LLiIUi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvpermi_q, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpermi_q, "V32ScV32ScV32ScIUi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvextrins_b, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvextrins_b, "V32ScV32ScV32ScIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvextrins_h, "V16sV16sV16sIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvextrins_w, "V8iV8iV8iIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvextrins_d, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvld, "V32ScvC*Ii", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvst, "vV32Scv*Ii", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvldx, "V32ScvC*LLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvstx, "vV32Scv*LLi", "nc", "lasx") -TARGET_BUILTIN(__builtin_lasx_xvldrepl_b, "V32cvC*Ii", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvldrepl_b, "V32ScvC*Ii", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvldrepl_h, "V16svC*Ii", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvldrepl_w, "V8ivC*Ii", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvldrepl_d, "V4LLivC*Ii", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvstelm_b, "vV32Scv*IiUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvstelm_h, "vV16Ssv*IiUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvstelm_w, "vV8Siv*IiUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvstelm_d, "vV4SLLiv*IiUi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xbz_v, "iV32Uc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xbnz_v, "iV32Uc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xbz_b, "iV32Uc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xbz_h, "iV16Us", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xbz_w, "iV8Ui", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xbz_d, "iV4ULLi", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xbnz_b, "iV32Uc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xbnz_h, "iV16Us", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xbnz_w, "iV8Ui", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xbnz_d, "iV4ULLi", "nc", "lasx") diff --git a/clang/include/clang/Basic/BuiltinsLoongArchLSX.def b/clang/include/clang/Basic/BuiltinsLoongArchLSX.def index c90f4dc5458f..b3056971986d 100644 --- a/clang/include/clang/Basic/BuiltinsLoongArchLSX.def +++ b/clang/include/clang/Basic/BuiltinsLoongArchLSX.def @@ -1,959 +1,959 @@ //=============------------- BuiltinsLoongArchLSX.def --------------- C++ -*-=// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file defines the LoongArch-specific LSX builtin function database. // Users of this file must define the BUILTIN macro to make use of this // information. // //===----------------------------------------------------------------------===// -TARGET_BUILTIN(__builtin_lsx_vadd_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vadd_b, "V16ScV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vadd_h, "V8sV8sV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vadd_w, "V4iV4iV4i", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vadd_d, "V2LLiV2LLiV2LLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vadd_q, "V2LLiV2LLiV2LLi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vsub_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsub_b, "V16ScV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsub_h, "V8sV8sV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsub_w, "V4iV4iV4i", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsub_d, "V2LLiV2LLiV2LLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsub_q, "V2LLiV2LLiV2LLi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vaddi_bu, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddi_bu, "V16ScV16ScIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vaddi_hu, "V8sV8sIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vaddi_wu, "V4iV4iIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vaddi_du, "V2LLiV2LLiIUi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vsubi_bu, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubi_bu, "V16ScV16ScIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsubi_hu, "V8sV8sIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsubi_wu, "V4iV4iIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsubi_du, "V2LLiV2LLiIUi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vneg_b, "V16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vneg_b, "V16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vneg_h, "V8sV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vneg_w, "V4iV4i", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vneg_d, "V2LLiV2LLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsadd_b, "V16ScV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsadd_h, "V8SsV8SsV8Ss", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsadd_w, "V4SiV4SiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsadd_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsadd_bu, "V16UcV16UcV16Uc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsadd_hu, "V8UsV8UsV8Us", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsadd_wu, "V4UiV4UiV4Ui", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsadd_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssub_b, "V16ScV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssub_h, "V8SsV8SsV8Ss", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssub_w, "V4SiV4SiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssub_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssub_bu, "V16UcV16UcV16Uc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssub_hu, "V8UsV8UsV8Us", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssub_wu, "V4UiV4UiV4Ui", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssub_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vhaddw_h_b, "V8SsV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vhaddw_w_h, "V4SiV8SsV8Ss", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vhaddw_d_w, "V2SLLiV4SiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vhaddw_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vhaddw_hu_bu, "V8UsV16UcV16Uc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vhaddw_wu_hu, "V4UiV8UsV8Us", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vhaddw_du_wu, "V2ULLiV4UiV4Ui", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vhaddw_qu_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vhsubw_h_b, "V8SsV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vhsubw_w_h, "V4SiV8SsV8Ss", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vhsubw_d_w, "V2SLLiV4SiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vhsubw_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vhsubw_hu_bu, "V8UsV16UcV16Uc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vhsubw_wu_hu, "V4UiV8UsV8Us", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vhsubw_du_wu, "V2ULLiV4UiV4Ui", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vhsubw_qu_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vaddwev_h_b, "V8sV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwev_h_b, "V8sV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vaddwev_w_h, "V4SiV8sV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vaddwev_d_w, "V2LLiV4SiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vaddwev_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vaddwod_h_b, "V8sV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwod_h_b, "V8sV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vaddwod_w_h, "V4SiV8sV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vaddwod_d_w, "V2LLiV4SiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vaddwod_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vsubwev_h_b, "V8sV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubwev_h_b, "V8sV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsubwev_w_h, "V4SiV8sV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsubwev_d_w, "V2LLiV4SiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsubwev_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vsubwod_h_b, "V8sV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubwod_h_b, "V8sV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsubwod_w_h, "V4SiV8sV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsubwod_d_w, "V2LLiV4SiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsubwod_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vaddwev_h_bu, "V8sV16UcV16Uc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vaddwev_w_hu, "V4SiV8UsV8Us", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vaddwev_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vaddwev_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vaddwod_h_bu, "V8sV16UcV16Uc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vaddwod_w_hu, "V4SiV8UsV8Us", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vaddwod_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vaddwod_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsubwev_h_bu, "V8sV16UcV16Uc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsubwev_w_hu, "V4SiV8UsV8Us", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsubwev_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsubwev_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsubwod_h_bu, "V8sV16UcV16Uc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsubwod_w_hu, "V4SiV8UsV8Us", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsubwod_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsubwod_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vaddwev_h_bu_b, "V8sV16UcV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwev_h_bu_b, "V8sV16UcV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vaddwev_w_hu_h, "V4SiV8UsV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vaddwev_d_wu_w, "V2LLiV4UiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vaddwev_q_du_d, "V2LLiV2ULLiV2LLi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vaddwod_h_bu_b, "V8sV16UcV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwod_h_bu_b, "V8sV16UcV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vaddwod_w_hu_h, "V4SiV8UsV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vaddwod_d_wu_w, "V2LLiV4UiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vaddwod_q_du_d, "V2LLiV2ULLiV2LLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vavg_b, "V16ScV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vavg_h, "V8SsV8SsV8Ss", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vavg_w, "V4SiV4SiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vavg_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vavg_bu, "V16UcV16UcV16Uc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vavg_hu, "V8UsV8UsV8Us", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vavg_wu, "V4UiV4UiV4Ui", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vavg_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vavgr_b, "V16ScV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vavgr_h, "V8SsV8SsV8Ss", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vavgr_w, "V4SiV4SiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vavgr_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vavgr_bu, "V16UcV16UcV16Uc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vavgr_hu, "V8UsV8UsV8Us", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vavgr_wu, "V4UiV4UiV4Ui", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vavgr_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vabsd_b, "V16ScV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vabsd_h, "V8SsV8SsV8Ss", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vabsd_w, "V4SiV4SiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vabsd_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vabsd_bu, "V16UcV16UcV16Uc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vabsd_hu, "V8UsV8UsV8Us", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vabsd_wu, "V4UiV4UiV4Ui", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vabsd_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vadda_b, "V16ScV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vadda_h, "V8SsV8SsV8Ss", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vadda_w, "V4SiV4SiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vadda_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmax_b, "V16ScV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmax_h, "V8SsV8SsV8Ss", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmax_w, "V4SiV4SiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmax_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmaxi_b, "V16ScV16ScIi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmaxi_h, "V8SsV8SsIi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmaxi_w, "V4SiV4SiIi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmaxi_d, "V2SLLiV2SLLiIi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmax_bu, "V16UcV16UcV16Uc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmax_hu, "V8UsV8UsV8Us", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmax_wu, "V4UiV4UiV4Ui", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmax_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmaxi_bu, "V16UcV16UcIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmaxi_hu, "V8UsV8UsIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmaxi_wu, "V4UiV4UiIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmaxi_du, "V2ULLiV2ULLiIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmin_b, "V16ScV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmin_h, "V8SsV8SsV8Ss", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmin_w, "V4SiV4SiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmin_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmini_b, "V16ScV16ScIi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmini_h, "V8SsV8SsIi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmini_w, "V4SiV4SiIi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmini_d, "V2SLLiV2SLLiIi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmin_bu, "V16UcV16UcV16Uc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmin_hu, "V8UsV8UsV8Us", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmin_wu, "V4UiV4UiV4Ui", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmin_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmini_bu, "V16UcV16UcIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmini_hu, "V8UsV8UsIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmini_wu, "V4UiV4UiIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmini_du, "V2ULLiV2ULLiIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmul_b, "V16ScV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmul_h, "V8SsV8SsV8Ss", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmul_w, "V4SiV4SiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmul_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vmuh_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmuh_b, "V16ScV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmuh_h, "V8sV8sV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmuh_w, "V4iV4iV4i", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmuh_d, "V2LLiV2LLiV2LLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmuh_bu, "V16UcV16UcV16Uc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmuh_hu, "V8UsV8UsV8Us", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmuh_wu, "V4UiV4UiV4Ui", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmuh_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vmulwev_h_b, "V8sV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwev_h_b, "V8sV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmulwev_w_h, "V4SiV8sV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmulwev_d_w, "V2LLiV4SiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmulwev_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vmulwod_h_b, "V8sV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwod_h_b, "V8sV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmulwod_w_h, "V4SiV8sV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmulwod_d_w, "V2LLiV4SiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmulwod_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmulwev_h_bu, "V8sV16UcV16Uc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmulwev_w_hu, "V4SiV8UsV8Us", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmulwev_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmulwev_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmulwod_h_bu, "V8sV16UcV16Uc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmulwod_w_hu, "V4SiV8UsV8Us", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmulwod_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmulwod_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vmulwev_h_bu_b, "V8sV16UcV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwev_h_bu_b, "V8sV16UcV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmulwev_w_hu_h, "V4SiV8UsV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmulwev_d_wu_w, "V2LLiV4UiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmulwev_q_du_d, "V2LLiV2ULLiV2LLi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vmulwod_h_bu_b, "V8sV16UcV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwod_h_bu_b, "V8sV16UcV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmulwod_w_hu_h, "V4SiV8UsV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmulwod_d_wu_w, "V2LLiV4UiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmulwod_q_du_d, "V2LLiV2ULLiV2LLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmadd_b, "V16ScV16ScV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmadd_h, "V8SsV8SsV8SsV8Ss", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmadd_w, "V4SiV4SiV4SiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmadd_d, "V2SLLiV2SLLiV2SLLiV2SLLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmsub_b, "V16ScV16ScV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmsub_h, "V8SsV8SsV8SsV8Ss", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmsub_w, "V4SiV4SiV4SiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmsub_d, "V2SLLiV2SLLiV2SLLiV2SLLi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vmaddwev_h_b, "V8sV8sV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwev_h_b, "V8sV8sV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmaddwev_w_h, "V4SiV4SiV8sV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmaddwev_d_w, "V2LLiV2LLiV4SiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmaddwev_q_d, "V2LLiV2LLiV2LLiV2LLi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vmaddwod_h_b, "V8sV8sV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwod_h_b, "V8sV8sV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmaddwod_w_h, "V4SiV4SiV8sV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmaddwod_d_w, "V2LLiV2LLiV4SiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmaddwod_q_d, "V2LLiV2LLiV2LLiV2LLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmaddwev_h_bu, "V8UsV8UsV16UcV16Uc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmaddwev_w_hu, "V4UiV4UiV8UsV8Us", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmaddwev_d_wu, "V2ULLiV2ULLiV4UiV4Ui", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmaddwev_q_du, "V2ULLiV2ULLiV2ULLiV2ULLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmaddwod_h_bu, "V8UsV8UsV16UcV16Uc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmaddwod_w_hu, "V4UiV4UiV8UsV8Us", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmaddwod_d_wu, "V2ULLiV2ULLiV4UiV4Ui", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmaddwod_q_du, "V2ULLiV2ULLiV2ULLiV2ULLi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vmaddwev_h_bu_b, "V8sV8sV16UcV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwev_h_bu_b, "V8sV8sV16UcV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmaddwev_w_hu_h, "V4SiV4SiV8UsV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmaddwev_d_wu_w, "V2LLiV2LLiV4UiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmaddwev_q_du_d, "V2LLiV2LLiV2ULLiV2LLi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vmaddwod_h_bu_b, "V8sV8sV16UcV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwod_h_bu_b, "V8sV8sV16UcV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmaddwod_w_hu_h, "V4SiV4SiV8UsV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmaddwod_d_wu_w, "V2LLiV2LLiV4UiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmaddwod_q_du_d, "V2LLiV2LLiV2ULLiV2LLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vdiv_b, "V16ScV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vdiv_h, "V8SsV8SsV8Ss", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vdiv_w, "V4SiV4SiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vdiv_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vdiv_bu, "V16UcV16UcV16Uc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vdiv_hu, "V8UsV8UsV8Us", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vdiv_wu, "V4UiV4UiV4Ui", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vdiv_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmod_b, "V16ScV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmod_h, "V8SsV8SsV8Ss", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmod_w, "V4SiV4SiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmod_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmod_bu, "V16UcV16UcV16Uc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmod_hu, "V8UsV8UsV8Us", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmod_wu, "V4UiV4UiV4Ui", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmod_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsat_b, "V16ScV16ScIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsat_h, "V8SsV8SsIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsat_w, "V4SiV4SiIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsat_d, "V2SLLiV2SLLiIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsat_bu, "V16UcV16UcIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsat_hu, "V8UsV8UsIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsat_wu, "V4UiV4UiIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsat_du, "V2ULLiV2ULLiIUi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vexth_h_b, "V8sV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vexth_h_b, "V8sV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vexth_w_h, "V4SiV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vexth_d_w, "V2LLiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vexth_q_d, "V2LLiV2LLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vexth_hu_bu, "V8UsV16Uc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vexth_wu_hu, "V4UiV8Us", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vexth_du_wu, "V2ULLiV4Ui", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vexth_qu_du, "V2ULLiV2ULLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsigncov_b, "V16ScV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsigncov_h, "V8SsV8SsV8Ss", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsigncov_w, "V4SiV4SiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsigncov_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vmskltz_b, "V16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmskltz_b, "V16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmskltz_h, "V8sV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmskltz_w, "V4iV4i", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vmskltz_d, "V2LLiV2LLi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vmskgez_b, "V16cV16c", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vmsknz_b, "V8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmskgez_b, "V16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmsknz_b, "V16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vldi, "V2LLiIi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vrepli_b, "V16cIi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vrepli_b, "V16ScIi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vrepli_h, "V8sIi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vrepli_w, "V4iIi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vrepli_d, "V2LLiIi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vand_v, "V16UcV16UcV16Uc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vor_v, "V16UcV16UcV16Uc", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vxor_v, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vxor_v, "V16UcV16UcV16Uc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vnor_v, "V16UcV16UcV16Uc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vandn_v, "V16UcV16UcV16Uc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vorn_v, "V16ScV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vandi_b, "V16UcV16UcIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vori_b, "V16UcV16UcIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vxori_b, "V16UcV16UcIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vnori_b, "V16UcV16UcIUi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vsll_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsll_b, "V16ScV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsll_h, "V8sV8sV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsll_w, "V4iV4iV4i", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsll_d, "V2LLiV2LLiV2LLi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vslli_b, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslli_b, "V16ScV16ScIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vslli_h, "V8sV8sIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vslli_w, "V4iV4iIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vslli_d, "V2LLiV2LLiIUi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vsrl_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrl_b, "V16ScV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrl_h, "V8sV8sV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrl_w, "V4iV4iV4i", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrl_d, "V2LLiV2LLiV2LLi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vsrli_b, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrli_b, "V16ScV16ScIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrli_h, "V8sV8sIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrli_w, "V4iV4iIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrli_d, "V2LLiV2LLiIUi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vsra_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsra_b, "V16ScV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsra_h, "V8sV8sV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsra_w, "V4iV4iV4i", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsra_d, "V2LLiV2LLiV2LLi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vsrai_b, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrai_b, "V16ScV16ScIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrai_h, "V8sV8sIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrai_w, "V4iV4iIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrai_d, "V2LLiV2LLiIUi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vrotr_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vrotr_b, "V16ScV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vrotr_h, "V8sV8sV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vrotr_w, "V4iV4iV4i", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vrotr_d, "V2LLiV2LLiV2LLi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vrotri_b, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vrotri_b, "V16ScV16ScIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vrotri_h, "V8sV8sIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vrotri_w, "V4iV4iIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vrotri_d, "V2LLiV2LLiIUi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vsllwil_h_b, "V8sV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsllwil_h_b, "V8sV16ScIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsllwil_w_h, "V4SiV8sIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsllwil_d_w, "V2LLiV4SiIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vextl_q_d, "V2LLiV2LLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsllwil_hu_bu, "V8UsV16UcIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsllwil_wu_hu, "V4UiV8UsIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsllwil_du_wu, "V2ULLiV4UiIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vextl_qu_du, "V2LLiV2ULLi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vsrlr_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlr_b, "V16ScV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrlr_h, "V8sV8sV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrlr_w, "V4iV4iV4i", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrlr_d, "V2LLiV2LLiV2LLi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vsrlri_b, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlri_b, "V16ScV16ScIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrlri_h, "V8sV8sIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrlri_w, "V4iV4iIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrlri_d, "V2LLiV2LLiIUi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vsrar_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrar_b, "V16ScV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrar_h, "V8sV8sV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrar_w, "V4iV4iV4i", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrar_d, "V2LLiV2LLiV2LLi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vsrari_b, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrari_b, "V16ScV16ScIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrari_h, "V8sV8sIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrari_w, "V4iV4iIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrari_d, "V2LLiV2LLiIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrln_b_h, "V16ScV8sV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrln_h_w, "V8sV4SiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrln_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsran_b_h, "V16ScV8sV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsran_h_w, "V8sV4SiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsran_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vsrlni_b_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlni_b_h, "V16ScV16ScV16ScIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrlni_h_w, "V8sV8sV8sIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrlni_w_d, "V4iV4iV4iIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrlni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vsrani_b_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrani_b_h, "V16ScV16ScV16ScIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrani_h_w, "V8sV8sV8sIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrani_w_d, "V4iV4iV4iIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrani_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrlrn_b_h, "V16ScV8sV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrlrn_h_w, "V8sV4SiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrlrn_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrarn_b_h, "V16ScV8sV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrarn_h_w, "V8sV4SiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrarn_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vsrlrni_b_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlrni_b_h, "V16ScV16ScV16ScIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrlrni_h_w, "V8sV8sV8sIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrlrni_w_d, "V4iV4iV4iIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrlrni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vsrarni_b_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrarni_b_h, "V16ScV16ScV16ScIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrarni_h_w, "V8sV8sV8sIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrarni_w_d, "V4iV4iV4iIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsrarni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrln_b_h, "V16ScV8sV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrln_h_w, "V8sV4SiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrln_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssran_b_h, "V16ScV8sV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssran_h_w, "V8sV4SiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssran_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrln_bu_h, "V16UcV8UsV8Us", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrln_hu_w, "V8UsV4UiV4Ui", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrln_wu_d, "V4UiV2ULLiV2ULLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssran_bu_h, "V16UcV8UsV8Us", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssran_hu_w, "V8UsV4UiV4Ui", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssran_wu_d, "V4UiV2ULLiV2ULLi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vssrlni_b_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlni_b_h, "V16ScV16ScV16ScIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrlni_h_w, "V8sV8sV8sIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrlni_w_d, "V4iV4iV4iIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrlni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vssrani_b_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrani_b_h, "V16ScV16ScV16ScIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrani_h_w, "V8sV8sV8sIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrani_w_d, "V4iV4iV4iIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrani_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vssrlrni_bu_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlrni_bu_h, "V16ScV16ScV16ScIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrlrni_hu_w, "V8sV8sV8sIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrlrni_wu_d, "V4iV4iV4iIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrlrni_du_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vssrani_bu_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrani_bu_h, "V16ScV16ScV16ScIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrani_hu_w, "V8sV8sV8sIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrani_wu_d, "V4iV4iV4iIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrani_du_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrlrn_b_h, "V16ScV8sV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrlrn_h_w, "V8sV4SiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrlrn_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrarn_b_h, "V16ScV8sV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrarn_h_w, "V8sV4SiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrarn_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrlrn_bu_h, "V16UcV8UsV8Us", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrlrn_hu_w, "V8UsV4UiV4Ui", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrlrn_wu_d, "V4UiV2ULLiV2ULLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrarn_bu_h, "V16UcV8UsV8Us", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrarn_hu_w, "V8UsV4UiV4Ui", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrarn_wu_d, "V4UiV2ULLiV2ULLi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vssrlrni_b_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlrni_b_h, "V16ScV16ScV16ScIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrlrni_h_w, "V8sV8sV8sIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrlrni_w_d, "V4iV4iV4iIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrlrni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vssrarni_b_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrarni_b_h, "V16ScV16ScV16ScIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrarni_h_w, "V8sV8sV8sIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrarni_w_d, "V4iV4iV4iIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrarni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vssrlni_bu_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlni_bu_h, "V16ScV16ScV16ScIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrlni_hu_w, "V8sV8sV8sIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrlni_wu_d, "V4iV4iV4iIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrlni_du_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vssrarni_bu_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrarni_bu_h, "V16ScV16ScV16ScIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrarni_hu_w, "V8sV8sV8sIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrarni_wu_d, "V4iV4iV4iIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vssrarni_du_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vclo_b, "V16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vclo_h, "V8SsV8Ss", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vclo_w, "V4SiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vclo_d, "V2SLLiV2SLLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vclz_b, "V16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vclz_h, "V8SsV8Ss", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vclz_w, "V4SiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vclz_d, "V2SLLiV2SLLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vpcnt_b, "V16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vpcnt_h, "V8SsV8Ss", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vpcnt_w, "V4SiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vpcnt_d, "V2SLLiV2SLLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vbitclr_b, "V16UcV16UcV16Uc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vbitclr_h, "V8UsV8UsV8Us", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vbitclr_w, "V4UiV4UiV4Ui", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vbitclr_d, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vbitclri_b, "V16UcV16UcIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vbitclri_h, "V8UsV8UsIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vbitclri_w, "V4UiV4UiIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vbitclri_d, "V2ULLiV2ULLiIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vbitset_b, "V16UcV16UcV16Uc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vbitset_h, "V8UsV8UsV8Us", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vbitset_w, "V4UiV4UiV4Ui", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vbitset_d, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vbitseti_b, "V16UcV16UcIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vbitseti_h, "V8UsV8UsIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vbitseti_w, "V4UiV4UiIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vbitseti_d, "V2ULLiV2ULLiIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vbitrev_b, "V16UcV16UcV16Uc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vbitrev_h, "V8UsV8UsV8Us", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vbitrev_w, "V4UiV4UiV4Ui", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vbitrev_d, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vbitrevi_b, "V16UcV16UcIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vbitrevi_h, "V8UsV8UsIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vbitrevi_w, "V4UiV4UiIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vbitrevi_d, "V2ULLiV2ULLiIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfrstp_b, "V16ScV16ScV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfrstp_h, "V8SsV8SsV8SsV8Ss", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vfrstpi_b, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfrstpi_b, "V16ScV16ScV16ScIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfrstpi_h, "V8sV8sV8sIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfadd_s, "V4fV4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfadd_d, "V2dV2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfsub_s, "V4fV4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfsub_d, "V2dV2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfmul_s, "V4fV4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfmul_d, "V2dV2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfdiv_s, "V4fV4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfdiv_d, "V2dV2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfmadd_s, "V4fV4fV4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfmadd_d, "V2dV2dV2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfmsub_s, "V4fV4fV4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfmsub_d, "V2dV2dV2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfnmadd_s, "V4fV4fV4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfnmadd_d, "V2dV2dV2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfnmsub_s, "V4fV4fV4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfnmsub_d, "V2dV2dV2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfmax_s, "V4fV4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfmax_d, "V2dV2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfmin_s, "V4fV4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfmin_d, "V2dV2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfmaxa_s, "V4fV4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfmaxa_d, "V2dV2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfmina_s, "V4fV4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfmina_d, "V2dV2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vflogb_s, "V4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vflogb_d, "V2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfclass_s, "V4iV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfclass_d, "V2LLiV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfsqrt_s, "V4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfsqrt_d, "V2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfrecip_s, "V4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfrecip_d, "V2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfrecipe_s, "V4fV4f", "nc", "lsx,frecipe") TARGET_BUILTIN(__builtin_lsx_vfrecipe_d, "V2dV2d", "nc", "lsx,frecipe") TARGET_BUILTIN(__builtin_lsx_vfrsqrt_s, "V4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfrsqrt_d, "V2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfrsqrte_s, "V4fV4f", "nc", "lsx,frecipe") TARGET_BUILTIN(__builtin_lsx_vfrsqrte_d, "V2dV2d", "nc", "lsx,frecipe") TARGET_BUILTIN(__builtin_lsx_vfcvtl_s_h, "V4fV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcvtl_d_s, "V2dV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcvth_s_h, "V4fV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcvth_d_s, "V2dV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcvt_h_s, "V8sV4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcvt_s_d, "V4fV2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfrintrne_s, "V4SiV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfrintrne_d, "V2LLiV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfrintrz_s, "V4SiV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfrintrz_d, "V2LLiV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfrintrp_s, "V4SiV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfrintrp_d, "V2LLiV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfrintrm_s, "V4SiV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfrintrm_d, "V2LLiV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfrint_s, "V4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfrint_d, "V2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vftintrne_w_s, "V4SiV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vftintrne_l_d, "V2LLiV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vftintrz_w_s, "V4SiV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vftintrz_l_d, "V2LLiV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vftintrp_w_s, "V4SiV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vftintrp_l_d, "V2LLiV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vftintrm_w_s, "V4SiV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vftintrm_l_d, "V2LLiV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vftint_w_s, "V4SiV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vftint_l_d, "V2SLLiV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vftintrz_wu_s, "V4UiV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vftintrz_lu_d, "V2ULLiV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vftint_wu_s, "V4UiV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vftint_lu_d, "V2ULLiV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vftintrne_w_d, "V4SiV2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vftintrz_w_d, "V4SiV2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vftintrp_w_d, "V4SiV2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vftintrm_w_d, "V4SiV2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vftint_w_d, "V4SiV2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vftintrnel_l_s, "V2LLiV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vftintrneh_l_s, "V2LLiV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vftintrzl_l_s, "V2LLiV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vftintrzh_l_s, "V2LLiV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vftintrpl_l_s, "V2LLiV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vftintrph_l_s, "V2LLiV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vftintrml_l_s, "V2LLiV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vftintrmh_l_s, "V2LLiV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vftintl_l_s, "V2LLiV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vftinth_l_s, "V2LLiV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vffint_s_w, "V4fV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vffint_d_l, "V2dV2SLLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vffint_s_wu, "V4fV4Ui", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vffint_d_lu, "V2dV2ULLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vffintl_d_w, "V2dV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vffinth_d_w, "V2dV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vffint_s_l, "V4fV2LLiV2LLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vseq_b, "V16ScV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vseq_h, "V8SsV8SsV8Ss", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vseq_w, "V4SiV4SiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vseq_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vseqi_b, "V16ScV16ScISi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vseqi_h, "V8SsV8SsISi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vseqi_w, "V4SiV4SiISi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vseqi_d, "V2SLLiV2SLLiISi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsle_b, "V16ScV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsle_h, "V8SsV8SsV8Ss", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsle_w, "V4SiV4SiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsle_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vslei_b, "V16ScV16ScISi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vslei_h, "V8SsV8SsISi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vslei_w, "V4SiV4SiISi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vslei_d, "V2SLLiV2SLLiISi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsle_bu, "V16ScV16UcV16Uc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsle_hu, "V8SsV8UsV8Us", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsle_wu, "V4SiV4UiV4Ui", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vsle_du, "V2SLLiV2ULLiV2ULLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vslei_bu, "V16ScV16UcIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vslei_hu, "V8SsV8UsIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vslei_wu, "V4SiV4UiIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vslei_du, "V2SLLiV2ULLiIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vslt_b, "V16ScV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vslt_h, "V8SsV8SsV8Ss", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vslt_w, "V4SiV4SiV4Si", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vslt_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vslti_b, "V16ScV16ScISi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vslti_h, "V8SsV8SsISi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vslti_w, "V4SiV4SiISi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vslti_d, "V2SLLiV2SLLiISi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vslt_bu, "V16ScV16UcV16Uc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vslt_hu, "V8SsV8UsV8Us", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vslt_wu, "V4SiV4UiV4Ui", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vslt_du, "V2SLLiV2ULLiV2ULLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vslti_bu, "V16ScV16UcIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vslti_hu, "V8SsV8UsIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vslti_wu, "V4SiV4UiIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vslti_du, "V2SLLiV2ULLiIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_caf_s, "V4SiV4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_caf_d, "V2SLLiV2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_cun_s, "V4SiV4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_cun_d, "V2SLLiV2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_ceq_s, "V4SiV4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_ceq_d, "V2SLLiV2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_cueq_s, "V4SiV4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_cueq_d, "V2SLLiV2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_clt_s, "V4SiV4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_clt_d, "V2SLLiV2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_cult_s, "V4SiV4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_cult_d, "V2SLLiV2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_cle_s, "V4SiV4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_cle_d, "V2SLLiV2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_cule_s, "V4SiV4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_cule_d, "V2SLLiV2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_cne_s, "V4SiV4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_cne_d, "V2SLLiV2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_cor_s, "V4SiV4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_cor_d, "V2SLLiV2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_cune_s, "V4SiV4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_cune_d, "V2SLLiV2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_saf_s, "V4SiV4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_saf_d, "V2SLLiV2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_sun_s, "V4SiV4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_sun_d, "V2SLLiV2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_seq_s, "V4SiV4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_seq_d, "V2SLLiV2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_sueq_s, "V4SiV4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_sueq_d, "V2SLLiV2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_slt_s, "V4SiV4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_slt_d, "V2SLLiV2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_sult_s, "V4SiV4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_sult_d, "V2SLLiV2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_sle_s, "V4SiV4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_sle_d, "V2SLLiV2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_sule_s, "V4SiV4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_sule_d, "V2SLLiV2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_sne_s, "V4SiV4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_sne_d, "V2SLLiV2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_sor_s, "V4SiV4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_sor_d, "V2SLLiV2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_sune_s, "V4SiV4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcmp_sune_d, "V2SLLiV2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vbitsel_v, "V16UcV16UcV16UcV16Uc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vbitseli_b, "V16UcV16UcV16UcIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vreplgr2vr_b, "V16Sci", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vreplgr2vr_h, "V8Ssi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vreplgr2vr_w, "V4Sii", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vreplgr2vr_d, "V2SLLiLLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vinsgr2vr_b, "V16ScV16SciIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vinsgr2vr_h, "V8SsV8SsiIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vinsgr2vr_w, "V4SiV4SiiIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vinsgr2vr_d, "V2SLLiV2SLLiLLiIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vpickve2gr_b, "iV16ScIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vpickve2gr_h, "iV8SsIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vpickve2gr_w, "iV4SiIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vpickve2gr_d, "LLiV2SLLiIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vpickve2gr_bu, "iV16UcIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vpickve2gr_hu, "iV8UsIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vpickve2gr_wu, "iV4UiIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vpickve2gr_du, "LLiV2ULLiIUi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vreplve_b, "V16cV16cUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vreplve_b, "V16ScV16ScUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vreplve_h, "V8sV8sUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vreplve_w, "V4iV4iUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vreplve_d, "V2LLiV2LLiUi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vreplvei_b, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vreplvei_b, "V16ScV16ScIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vreplvei_h, "V8sV8sIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vreplvei_w, "V4iV4iIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vreplvei_d, "V2LLiV2LLiIUi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vbsll_v, "V16cV16cIUi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vbsrl_v, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbsll_v, "V16ScV16ScIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbsrl_v, "V16ScV16ScIUi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vpackev_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpackev_b, "V16ScV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vpackev_h, "V8sV8sV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vpackev_w, "V4iV4iV4i", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vpackev_d, "V2LLiV2LLiV2LLi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vpackod_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpackod_b, "V16ScV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vpackod_h, "V8sV8sV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vpackod_w, "V4iV4iV4i", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vpackod_d, "V2LLiV2LLiV2LLi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vpickev_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpickev_b, "V16ScV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vpickev_h, "V8sV8sV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vpickev_w, "V4iV4iV4i", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vpickev_d, "V2LLiV2LLiV2LLi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vpickod_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpickod_b, "V16ScV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vpickod_h, "V8sV8sV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vpickod_w, "V4iV4iV4i", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vpickod_d, "V2LLiV2LLiV2LLi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vilvl_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vilvl_b, "V16ScV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vilvl_h, "V8sV8sV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vilvl_w, "V4iV4iV4i", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vilvl_d, "V2LLiV2LLiV2LLi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vilvh_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vilvh_b, "V16ScV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vilvh_h, "V8sV8sV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vilvh_w, "V4iV4iV4i", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vilvh_d, "V2LLiV2LLiV2LLi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vshuf_b, "V16UcV16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vshuf_b, "V16ScV16ScV16ScV16Sc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vshuf_h, "V8sV8sV8sV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vshuf_w, "V4iV4iV4iV4i", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vshuf_d, "V2LLiV2LLiV2LLiV2LLi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vshuf4i_b, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vshuf4i_b, "V16ScV16ScIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vshuf4i_h, "V8sV8sIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vshuf4i_w, "V4iV4iIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vshuf4i_d, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vpermi_w, "V4iV4iV4iIUi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vextrins_b, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vextrins_b, "V16ScV16ScV16ScIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vextrins_h, "V8sV8sV8sIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vextrins_w, "V4iV4iV4iIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vextrins_d, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vld, "V16ScvC*Ii", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vst, "vV16Scv*Ii", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vldx, "V16ScvC*LLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vstx, "vV16Scv*LLi", "nc", "lsx") -TARGET_BUILTIN(__builtin_lsx_vldrepl_b, "V16cvC*Ii", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vldrepl_b, "V16ScvC*Ii", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vldrepl_h, "V8svC*Ii", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vldrepl_w, "V4ivC*Ii", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vldrepl_d, "V2LLivC*Ii", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vstelm_b, "vV16Scv*IiUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vstelm_h, "vV8Ssv*IiUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vstelm_w, "vV4Siv*IiUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vstelm_d, "vV2SLLiv*IiUi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_bz_v, "iV16Uc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_bnz_v, "iV16Uc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_bz_b, "iV16Uc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_bz_h, "iV8Us", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_bz_w, "iV4Ui", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_bz_d, "iV2ULLi", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_bnz_b, "iV16Uc", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_bnz_h, "iV8Us", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_bnz_w, "iV4Ui", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_bnz_d, "iV2ULLi", "nc", "lsx") diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 490c4a2fc525..bc7cce0bcd7f 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -1,5761 +1,5765 @@ //===- Decl.cpp - Declaration AST Node Implementation ---------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements the Decl subclasses. // //===----------------------------------------------------------------------===// #include "clang/AST/Decl.h" #include "Linkage.h" #include "clang/AST/ASTContext.h" #include "clang/AST/ASTDiagnostic.h" #include "clang/AST/ASTLambda.h" #include "clang/AST/ASTMutationListener.h" #include "clang/AST/Attr.h" #include "clang/AST/CanonicalType.h" #include "clang/AST/DeclBase.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclOpenMP.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/DeclarationName.h" #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/ExternalASTSource.h" #include "clang/AST/ODRHash.h" #include "clang/AST/PrettyDeclStackTrace.h" #include "clang/AST/PrettyPrinter.h" #include "clang/AST/Randstruct.h" #include "clang/AST/RecordLayout.h" #include "clang/AST/Redeclarable.h" #include "clang/AST/Stmt.h" #include "clang/AST/TemplateBase.h" #include "clang/AST/Type.h" #include "clang/AST/TypeLoc.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/IdentifierTable.h" #include "clang/Basic/LLVM.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/Linkage.h" #include "clang/Basic/Module.h" #include "clang/Basic/NoSanitizeList.h" #include "clang/Basic/PartialDiagnostic.h" #include "clang/Basic/Sanitizers.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/Specifiers.h" #include "clang/Basic/TargetCXXABI.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/Visibility.h" #include "llvm/ADT/APSInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TargetParser/Triple.h" #include #include #include #include #include #include #include #include #include using namespace clang; Decl *clang::getPrimaryMergedDecl(Decl *D) { return D->getASTContext().getPrimaryMergedDecl(D); } void PrettyDeclStackTraceEntry::print(raw_ostream &OS) const { SourceLocation Loc = this->Loc; if (!Loc.isValid() && TheDecl) Loc = TheDecl->getLocation(); if (Loc.isValid()) { Loc.print(OS, Context.getSourceManager()); OS << ": "; } OS << Message; if (auto *ND = dyn_cast_if_present(TheDecl)) { OS << " '"; ND->getNameForDiagnostic(OS, Context.getPrintingPolicy(), true); OS << "'"; } OS << '\n'; } // Defined here so that it can be inlined into its direct callers. bool Decl::isOutOfLine() const { return !getLexicalDeclContext()->Equals(getDeclContext()); } TranslationUnitDecl::TranslationUnitDecl(ASTContext &ctx) : Decl(TranslationUnit, nullptr, SourceLocation()), DeclContext(TranslationUnit), redeclarable_base(ctx), Ctx(ctx) {} //===----------------------------------------------------------------------===// // NamedDecl Implementation //===----------------------------------------------------------------------===// // Visibility rules aren't rigorously externally specified, but here // are the basic principles behind what we implement: // // 1. An explicit visibility attribute is generally a direct expression // of the user's intent and should be honored. Only the innermost // visibility attribute applies. If no visibility attribute applies, // global visibility settings are considered. // // 2. There is one caveat to the above: on or in a template pattern, // an explicit visibility attribute is just a default rule, and // visibility can be decreased by the visibility of template // arguments. But this, too, has an exception: an attribute on an // explicit specialization or instantiation causes all the visibility // restrictions of the template arguments to be ignored. // // 3. A variable that does not otherwise have explicit visibility can // be restricted by the visibility of its type. // // 4. A visibility restriction is explicit if it comes from an // attribute (or something like it), not a global visibility setting. // When emitting a reference to an external symbol, visibility // restrictions are ignored unless they are explicit. // // 5. When computing the visibility of a non-type, including a // non-type member of a class, only non-type visibility restrictions // are considered: the 'visibility' attribute, global value-visibility // settings, and a few special cases like __private_extern. // // 6. When computing the visibility of a type, including a type member // of a class, only type visibility restrictions are considered: // the 'type_visibility' attribute and global type-visibility settings. // However, a 'visibility' attribute counts as a 'type_visibility' // attribute on any declaration that only has the former. // // The visibility of a "secondary" entity, like a template argument, // is computed using the kind of that entity, not the kind of the // primary entity for which we are computing visibility. For example, // the visibility of a specialization of either of these templates: // template bool has_match(list, X); // template class matcher; // is restricted according to the type visibility of the argument 'T', // the type visibility of 'bool(&)(T,X)', and the value visibility of // the argument function 'compare'. That 'has_match' is a value // and 'matcher' is a type only matters when looking for attributes // and settings from the immediate context. /// Does this computation kind permit us to consider additional /// visibility settings from attributes and the like? static bool hasExplicitVisibilityAlready(LVComputationKind computation) { return computation.IgnoreExplicitVisibility; } /// Given an LVComputationKind, return one of the same type/value sort /// that records that it already has explicit visibility. static LVComputationKind withExplicitVisibilityAlready(LVComputationKind Kind) { Kind.IgnoreExplicitVisibility = true; return Kind; } static std::optional getExplicitVisibility(const NamedDecl *D, LVComputationKind kind) { assert(!kind.IgnoreExplicitVisibility && "asking for explicit visibility when we shouldn't be"); return D->getExplicitVisibility(kind.getExplicitVisibilityKind()); } /// Is the given declaration a "type" or a "value" for the purposes of /// visibility computation? static bool usesTypeVisibility(const NamedDecl *D) { return isa(D) || isa(D) || isa(D); } /// Does the given declaration have member specialization information, /// and if so, is it an explicit specialization? template static std::enable_if_t, bool> isExplicitMemberSpecialization(const T *D) { if (const MemberSpecializationInfo *member = D->getMemberSpecializationInfo()) { return member->isExplicitSpecialization(); } return false; } /// For templates, this question is easier: a member template can't be /// explicitly instantiated, so there's a single bit indicating whether /// or not this is an explicit member specialization. static bool isExplicitMemberSpecialization(const RedeclarableTemplateDecl *D) { return D->isMemberSpecialization(); } /// Given a visibility attribute, return the explicit visibility /// associated with it. template static Visibility getVisibilityFromAttr(const T *attr) { switch (attr->getVisibility()) { case T::Default: return DefaultVisibility; case T::Hidden: return HiddenVisibility; case T::Protected: return ProtectedVisibility; } llvm_unreachable("bad visibility kind"); } /// Return the explicit visibility of the given declaration. static std::optional getVisibilityOf(const NamedDecl *D, NamedDecl::ExplicitVisibilityKind kind) { // If we're ultimately computing the visibility of a type, look for // a 'type_visibility' attribute before looking for 'visibility'. if (kind == NamedDecl::VisibilityForType) { if (const auto *A = D->getAttr()) { return getVisibilityFromAttr(A); } } // If this declaration has an explicit visibility attribute, use it. if (const auto *A = D->getAttr()) { return getVisibilityFromAttr(A); } return std::nullopt; } LinkageInfo LinkageComputer::getLVForType(const Type &T, LVComputationKind computation) { if (computation.IgnoreAllVisibility) return LinkageInfo(T.getLinkage(), DefaultVisibility, true); return getTypeLinkageAndVisibility(&T); } /// Get the most restrictive linkage for the types in the given /// template parameter list. For visibility purposes, template /// parameters are part of the signature of a template. LinkageInfo LinkageComputer::getLVForTemplateParameterList( const TemplateParameterList *Params, LVComputationKind computation) { LinkageInfo LV; for (const NamedDecl *P : *Params) { // Template type parameters are the most common and never // contribute to visibility, pack or not. if (isa(P)) continue; // Non-type template parameters can be restricted by the value type, e.g. // template class A { ... }; // We have to be careful here, though, because we can be dealing with // dependent types. if (const auto *NTTP = dyn_cast(P)) { // Handle the non-pack case first. if (!NTTP->isExpandedParameterPack()) { if (!NTTP->getType()->isDependentType()) { LV.merge(getLVForType(*NTTP->getType(), computation)); } continue; } // Look at all the types in an expanded pack. for (unsigned i = 0, n = NTTP->getNumExpansionTypes(); i != n; ++i) { QualType type = NTTP->getExpansionType(i); if (!type->isDependentType()) LV.merge(getTypeLinkageAndVisibility(type)); } continue; } // Template template parameters can be restricted by their // template parameters, recursively. const auto *TTP = cast(P); // Handle the non-pack case first. if (!TTP->isExpandedParameterPack()) { LV.merge(getLVForTemplateParameterList(TTP->getTemplateParameters(), computation)); continue; } // Look at all expansions in an expanded pack. for (unsigned i = 0, n = TTP->getNumExpansionTemplateParameters(); i != n; ++i) { LV.merge(getLVForTemplateParameterList( TTP->getExpansionTemplateParameters(i), computation)); } } return LV; } static const Decl *getOutermostFuncOrBlockContext(const Decl *D) { const Decl *Ret = nullptr; const DeclContext *DC = D->getDeclContext(); while (DC->getDeclKind() != Decl::TranslationUnit) { if (isa(DC) || isa(DC)) Ret = cast(DC); DC = DC->getParent(); } return Ret; } /// Get the most restrictive linkage for the types and /// declarations in the given template argument list. /// /// Note that we don't take an LVComputationKind because we always /// want to honor the visibility of template arguments in the same way. LinkageInfo LinkageComputer::getLVForTemplateArgumentList(ArrayRef Args, LVComputationKind computation) { LinkageInfo LV; for (const TemplateArgument &Arg : Args) { switch (Arg.getKind()) { case TemplateArgument::Null: case TemplateArgument::Integral: case TemplateArgument::Expression: continue; case TemplateArgument::Type: LV.merge(getLVForType(*Arg.getAsType(), computation)); continue; case TemplateArgument::Declaration: { const NamedDecl *ND = Arg.getAsDecl(); assert(!usesTypeVisibility(ND)); LV.merge(getLVForDecl(ND, computation)); continue; } case TemplateArgument::NullPtr: LV.merge(getTypeLinkageAndVisibility(Arg.getNullPtrType())); continue; case TemplateArgument::StructuralValue: LV.merge(getLVForValue(Arg.getAsStructuralValue(), computation)); continue; case TemplateArgument::Template: case TemplateArgument::TemplateExpansion: if (TemplateDecl *Template = Arg.getAsTemplateOrTemplatePattern().getAsTemplateDecl()) LV.merge(getLVForDecl(Template, computation)); continue; case TemplateArgument::Pack: LV.merge(getLVForTemplateArgumentList(Arg.getPackAsArray(), computation)); continue; } llvm_unreachable("bad template argument kind"); } return LV; } LinkageInfo LinkageComputer::getLVForTemplateArgumentList(const TemplateArgumentList &TArgs, LVComputationKind computation) { return getLVForTemplateArgumentList(TArgs.asArray(), computation); } static bool shouldConsiderTemplateVisibility(const FunctionDecl *fn, const FunctionTemplateSpecializationInfo *specInfo) { // Include visibility from the template parameters and arguments // only if this is not an explicit instantiation or specialization // with direct explicit visibility. (Implicit instantiations won't // have a direct attribute.) if (!specInfo->isExplicitInstantiationOrSpecialization()) return true; return !fn->hasAttr(); } /// Merge in template-related linkage and visibility for the given /// function template specialization. /// /// We don't need a computation kind here because we can assume /// LVForValue. /// /// \param[out] LV the computation to use for the parent void LinkageComputer::mergeTemplateLV( LinkageInfo &LV, const FunctionDecl *fn, const FunctionTemplateSpecializationInfo *specInfo, LVComputationKind computation) { bool considerVisibility = shouldConsiderTemplateVisibility(fn, specInfo); FunctionTemplateDecl *temp = specInfo->getTemplate(); // Merge information from the template declaration. LinkageInfo tempLV = getLVForDecl(temp, computation); // The linkage of the specialization should be consistent with the // template declaration. LV.setLinkage(tempLV.getLinkage()); // Merge information from the template parameters. LinkageInfo paramsLV = getLVForTemplateParameterList(temp->getTemplateParameters(), computation); LV.mergeMaybeWithVisibility(paramsLV, considerVisibility); // Merge information from the template arguments. const TemplateArgumentList &templateArgs = *specInfo->TemplateArguments; LinkageInfo argsLV = getLVForTemplateArgumentList(templateArgs, computation); LV.mergeMaybeWithVisibility(argsLV, considerVisibility); } /// Does the given declaration have a direct visibility attribute /// that would match the given rules? static bool hasDirectVisibilityAttribute(const NamedDecl *D, LVComputationKind computation) { if (computation.IgnoreAllVisibility) return false; return (computation.isTypeVisibility() && D->hasAttr()) || D->hasAttr(); } /// Should we consider visibility associated with the template /// arguments and parameters of the given class template specialization? static bool shouldConsiderTemplateVisibility( const ClassTemplateSpecializationDecl *spec, LVComputationKind computation) { // Include visibility from the template parameters and arguments // only if this is not an explicit instantiation or specialization // with direct explicit visibility (and note that implicit // instantiations won't have a direct attribute). // // Furthermore, we want to ignore template parameters and arguments // for an explicit specialization when computing the visibility of a // member thereof with explicit visibility. // // This is a bit complex; let's unpack it. // // An explicit class specialization is an independent, top-level // declaration. As such, if it or any of its members has an // explicit visibility attribute, that must directly express the // user's intent, and we should honor it. The same logic applies to // an explicit instantiation of a member of such a thing. // Fast path: if this is not an explicit instantiation or // specialization, we always want to consider template-related // visibility restrictions. if (!spec->isExplicitInstantiationOrSpecialization()) return true; // This is the 'member thereof' check. if (spec->isExplicitSpecialization() && hasExplicitVisibilityAlready(computation)) return false; return !hasDirectVisibilityAttribute(spec, computation); } /// Merge in template-related linkage and visibility for the given /// class template specialization. void LinkageComputer::mergeTemplateLV( LinkageInfo &LV, const ClassTemplateSpecializationDecl *spec, LVComputationKind computation) { bool considerVisibility = shouldConsiderTemplateVisibility(spec, computation); // Merge information from the template parameters, but ignore // visibility if we're only considering template arguments. ClassTemplateDecl *temp = spec->getSpecializedTemplate(); // Merge information from the template declaration. LinkageInfo tempLV = getLVForDecl(temp, computation); // The linkage of the specialization should be consistent with the // template declaration. LV.setLinkage(tempLV.getLinkage()); LinkageInfo paramsLV = getLVForTemplateParameterList(temp->getTemplateParameters(), computation); LV.mergeMaybeWithVisibility(paramsLV, considerVisibility && !hasExplicitVisibilityAlready(computation)); // Merge information from the template arguments. We ignore // template-argument visibility if we've got an explicit // instantiation with a visibility attribute. const TemplateArgumentList &templateArgs = spec->getTemplateArgs(); LinkageInfo argsLV = getLVForTemplateArgumentList(templateArgs, computation); if (considerVisibility) LV.mergeVisibility(argsLV); LV.mergeExternalVisibility(argsLV); } /// Should we consider visibility associated with the template /// arguments and parameters of the given variable template /// specialization? As usual, follow class template specialization /// logic up to initialization. static bool shouldConsiderTemplateVisibility( const VarTemplateSpecializationDecl *spec, LVComputationKind computation) { // Include visibility from the template parameters and arguments // only if this is not an explicit instantiation or specialization // with direct explicit visibility (and note that implicit // instantiations won't have a direct attribute). if (!spec->isExplicitInstantiationOrSpecialization()) return true; // An explicit variable specialization is an independent, top-level // declaration. As such, if it has an explicit visibility attribute, // that must directly express the user's intent, and we should honor // it. if (spec->isExplicitSpecialization() && hasExplicitVisibilityAlready(computation)) return false; return !hasDirectVisibilityAttribute(spec, computation); } /// Merge in template-related linkage and visibility for the given /// variable template specialization. As usual, follow class template /// specialization logic up to initialization. void LinkageComputer::mergeTemplateLV(LinkageInfo &LV, const VarTemplateSpecializationDecl *spec, LVComputationKind computation) { bool considerVisibility = shouldConsiderTemplateVisibility(spec, computation); // Merge information from the template parameters, but ignore // visibility if we're only considering template arguments. VarTemplateDecl *temp = spec->getSpecializedTemplate(); LinkageInfo tempLV = getLVForTemplateParameterList(temp->getTemplateParameters(), computation); LV.mergeMaybeWithVisibility(tempLV, considerVisibility && !hasExplicitVisibilityAlready(computation)); // Merge information from the template arguments. We ignore // template-argument visibility if we've got an explicit // instantiation with a visibility attribute. const TemplateArgumentList &templateArgs = spec->getTemplateArgs(); LinkageInfo argsLV = getLVForTemplateArgumentList(templateArgs, computation); if (considerVisibility) LV.mergeVisibility(argsLV); LV.mergeExternalVisibility(argsLV); } static bool useInlineVisibilityHidden(const NamedDecl *D) { // FIXME: we should warn if -fvisibility-inlines-hidden is used with c. const LangOptions &Opts = D->getASTContext().getLangOpts(); if (!Opts.CPlusPlus || !Opts.InlineVisibilityHidden) return false; const auto *FD = dyn_cast(D); if (!FD) return false; TemplateSpecializationKind TSK = TSK_Undeclared; if (FunctionTemplateSpecializationInfo *spec = FD->getTemplateSpecializationInfo()) { TSK = spec->getTemplateSpecializationKind(); } else if (MemberSpecializationInfo *MSI = FD->getMemberSpecializationInfo()) { TSK = MSI->getTemplateSpecializationKind(); } const FunctionDecl *Def = nullptr; // InlineVisibilityHidden only applies to definitions, and // isInlined() only gives meaningful answers on definitions // anyway. return TSK != TSK_ExplicitInstantiationDeclaration && TSK != TSK_ExplicitInstantiationDefinition && FD->hasBody(Def) && Def->isInlined() && !Def->hasAttr(); } template static bool isFirstInExternCContext(T *D) { const T *First = D->getFirstDecl(); return First->isInExternCContext(); } static bool isSingleLineLanguageLinkage(const Decl &D) { if (const auto *SD = dyn_cast(D.getDeclContext())) if (!SD->hasBraces()) return true; return false; } static bool isDeclaredInModuleInterfaceOrPartition(const NamedDecl *D) { if (auto *M = D->getOwningModule()) return M->isInterfaceOrPartition(); return false; } static LinkageInfo getExternalLinkageFor(const NamedDecl *D) { return LinkageInfo::external(); } static StorageClass getStorageClass(const Decl *D) { if (auto *TD = dyn_cast(D)) D = TD->getTemplatedDecl(); if (D) { if (auto *VD = dyn_cast(D)) return VD->getStorageClass(); if (auto *FD = dyn_cast(D)) return FD->getStorageClass(); } return SC_None; } LinkageInfo LinkageComputer::getLVForNamespaceScopeDecl(const NamedDecl *D, LVComputationKind computation, bool IgnoreVarTypeLinkage) { assert(D->getDeclContext()->getRedeclContext()->isFileContext() && "Not a name having namespace scope"); ASTContext &Context = D->getASTContext(); const auto *Var = dyn_cast(D); // C++ [basic.link]p3: // A name having namespace scope (3.3.6) has internal linkage if it // is the name of if ((getStorageClass(D->getCanonicalDecl()) == SC_Static) || (Context.getLangOpts().C23 && Var && Var->isConstexpr())) { // - a variable, variable template, function, or function template // that is explicitly declared static; or // (This bullet corresponds to C99 6.2.2p3.) // C23 6.2.2p3 // If the declaration of a file scope identifier for // an object contains any of the storage-class specifiers static or // constexpr then the identifier has internal linkage. return LinkageInfo::internal(); } if (Var) { // - a non-template variable of non-volatile const-qualified type, unless // - it is explicitly declared extern, or // - it is declared in the purview of a module interface unit // (outside the private-module-fragment, if any) or module partition, or // - it is inline, or // - it was previously declared and the prior declaration did not have // internal linkage // (There is no equivalent in C99.) if (Context.getLangOpts().CPlusPlus && Var->getType().isConstQualified() && !Var->getType().isVolatileQualified() && !Var->isInline() && !isDeclaredInModuleInterfaceOrPartition(Var) && !isa(Var) && !Var->getDescribedVarTemplate()) { const VarDecl *PrevVar = Var->getPreviousDecl(); if (PrevVar) return getLVForDecl(PrevVar, computation); if (Var->getStorageClass() != SC_Extern && Var->getStorageClass() != SC_PrivateExtern && !isSingleLineLanguageLinkage(*Var)) return LinkageInfo::internal(); } for (const VarDecl *PrevVar = Var->getPreviousDecl(); PrevVar; PrevVar = PrevVar->getPreviousDecl()) { if (PrevVar->getStorageClass() == SC_PrivateExtern && Var->getStorageClass() == SC_None) return getDeclLinkageAndVisibility(PrevVar); // Explicitly declared static. if (PrevVar->getStorageClass() == SC_Static) return LinkageInfo::internal(); } } else if (const auto *IFD = dyn_cast(D)) { // - a data member of an anonymous union. const VarDecl *VD = IFD->getVarDecl(); assert(VD && "Expected a VarDecl in this IndirectFieldDecl!"); return getLVForNamespaceScopeDecl(VD, computation, IgnoreVarTypeLinkage); } assert(!isa(D) && "Didn't expect a FieldDecl!"); // FIXME: This gives internal linkage to names that should have no linkage // (those not covered by [basic.link]p6). if (D->isInAnonymousNamespace()) { const auto *Var = dyn_cast(D); const auto *Func = dyn_cast(D); // FIXME: The check for extern "C" here is not justified by the standard // wording, but we retain it from the pre-DR1113 model to avoid breaking // code. // // C++11 [basic.link]p4: // An unnamed namespace or a namespace declared directly or indirectly // within an unnamed namespace has internal linkage. if ((!Var || !isFirstInExternCContext(Var)) && (!Func || !isFirstInExternCContext(Func))) return LinkageInfo::internal(); } // Set up the defaults. // C99 6.2.2p5: // If the declaration of an identifier for an object has file // scope and no storage-class specifier, its linkage is // external. LinkageInfo LV = getExternalLinkageFor(D); if (!hasExplicitVisibilityAlready(computation)) { if (std::optional Vis = getExplicitVisibility(D, computation)) { LV.mergeVisibility(*Vis, true); } else { // If we're declared in a namespace with a visibility attribute, // use that namespace's visibility, and it still counts as explicit. for (const DeclContext *DC = D->getDeclContext(); !isa(DC); DC = DC->getParent()) { const auto *ND = dyn_cast(DC); if (!ND) continue; if (std::optional Vis = getExplicitVisibility(ND, computation)) { LV.mergeVisibility(*Vis, true); break; } } } // Add in global settings if the above didn't give us direct visibility. if (!LV.isVisibilityExplicit()) { // Use global type/value visibility as appropriate. Visibility globalVisibility = computation.isValueVisibility() ? Context.getLangOpts().getValueVisibilityMode() : Context.getLangOpts().getTypeVisibilityMode(); LV.mergeVisibility(globalVisibility, /*explicit*/ false); // If we're paying attention to global visibility, apply // -finline-visibility-hidden if this is an inline method. if (useInlineVisibilityHidden(D)) LV.mergeVisibility(HiddenVisibility, /*visibilityExplicit=*/false); } } // C++ [basic.link]p4: // A name having namespace scope that has not been given internal linkage // above and that is the name of // [...bullets...] // has its linkage determined as follows: // - if the enclosing namespace has internal linkage, the name has // internal linkage; [handled above] // - otherwise, if the declaration of the name is attached to a named // module and is not exported, the name has module linkage; // - otherwise, the name has external linkage. // LV is currently set up to handle the last two bullets. // // The bullets are: // - a variable; or if (const auto *Var = dyn_cast(D)) { // GCC applies the following optimization to variables and static // data members, but not to functions: // // Modify the variable's LV by the LV of its type unless this is // C or extern "C". This follows from [basic.link]p9: // A type without linkage shall not be used as the type of a // variable or function with external linkage unless // - the entity has C language linkage, or // - the entity is declared within an unnamed namespace, or // - the entity is not used or is defined in the same // translation unit. // and [basic.link]p10: // ...the types specified by all declarations referring to a // given variable or function shall be identical... // C does not have an equivalent rule. // // Ignore this if we've got an explicit attribute; the user // probably knows what they're doing. // // Note that we don't want to make the variable non-external // because of this, but unique-external linkage suits us. if (Context.getLangOpts().CPlusPlus && !isFirstInExternCContext(Var) && !IgnoreVarTypeLinkage) { LinkageInfo TypeLV = getLVForType(*Var->getType(), computation); if (!isExternallyVisible(TypeLV.getLinkage())) return LinkageInfo::uniqueExternal(); if (!LV.isVisibilityExplicit()) LV.mergeVisibility(TypeLV); } if (Var->getStorageClass() == SC_PrivateExtern) LV.mergeVisibility(HiddenVisibility, true); // Note that Sema::MergeVarDecl already takes care of implementing // C99 6.2.2p4 and propagating the visibility attribute, so we don't have // to do it here. // As per function and class template specializations (below), // consider LV for the template and template arguments. We're at file // scope, so we do not need to worry about nested specializations. if (const auto *spec = dyn_cast(Var)) { mergeTemplateLV(LV, spec, computation); } // - a function; or } else if (const auto *Function = dyn_cast(D)) { // In theory, we can modify the function's LV by the LV of its // type unless it has C linkage (see comment above about variables // for justification). In practice, GCC doesn't do this, so it's // just too painful to make work. if (Function->getStorageClass() == SC_PrivateExtern) LV.mergeVisibility(HiddenVisibility, true); // OpenMP target declare device functions are not callable from the host so // they should not be exported from the device image. This applies to all // functions as the host-callable kernel functions are emitted at codegen. if (Context.getLangOpts().OpenMP && Context.getLangOpts().OpenMPIsTargetDevice && ((Context.getTargetInfo().getTriple().isAMDGPU() || Context.getTargetInfo().getTriple().isNVPTX()) || OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(Function))) LV.mergeVisibility(HiddenVisibility, /*newExplicit=*/false); // Note that Sema::MergeCompatibleFunctionDecls already takes care of // merging storage classes and visibility attributes, so we don't have to // look at previous decls in here. // In C++, then if the type of the function uses a type with // unique-external linkage, it's not legally usable from outside // this translation unit. However, we should use the C linkage // rules instead for extern "C" declarations. if (Context.getLangOpts().CPlusPlus && !isFirstInExternCContext(Function)) { // Only look at the type-as-written. Otherwise, deducing the return type // of a function could change its linkage. QualType TypeAsWritten = Function->getType(); if (TypeSourceInfo *TSI = Function->getTypeSourceInfo()) TypeAsWritten = TSI->getType(); if (!isExternallyVisible(TypeAsWritten->getLinkage())) return LinkageInfo::uniqueExternal(); } // Consider LV from the template and the template arguments. // We're at file scope, so we do not need to worry about nested // specializations. if (FunctionTemplateSpecializationInfo *specInfo = Function->getTemplateSpecializationInfo()) { mergeTemplateLV(LV, Function, specInfo, computation); } // - a named class (Clause 9), or an unnamed class defined in a // typedef declaration in which the class has the typedef name // for linkage purposes (7.1.3); or // - a named enumeration (7.2), or an unnamed enumeration // defined in a typedef declaration in which the enumeration // has the typedef name for linkage purposes (7.1.3); or } else if (const auto *Tag = dyn_cast(D)) { // Unnamed tags have no linkage. if (!Tag->hasNameForLinkage()) return LinkageInfo::none(); // If this is a class template specialization, consider the // linkage of the template and template arguments. We're at file // scope, so we do not need to worry about nested specializations. if (const auto *spec = dyn_cast(Tag)) { mergeTemplateLV(LV, spec, computation); } // FIXME: This is not part of the C++ standard any more. // - an enumerator belonging to an enumeration with external linkage; or } else if (isa(D)) { LinkageInfo EnumLV = getLVForDecl(cast(D->getDeclContext()), computation); if (!isExternalFormalLinkage(EnumLV.getLinkage())) return LinkageInfo::none(); LV.merge(EnumLV); // - a template } else if (const auto *temp = dyn_cast(D)) { bool considerVisibility = !hasExplicitVisibilityAlready(computation); LinkageInfo tempLV = getLVForTemplateParameterList(temp->getTemplateParameters(), computation); LV.mergeMaybeWithVisibility(tempLV, considerVisibility); // An unnamed namespace or a namespace declared directly or indirectly // within an unnamed namespace has internal linkage. All other namespaces // have external linkage. // // We handled names in anonymous namespaces above. } else if (isa(D)) { return LV; // By extension, we assign external linkage to Objective-C // interfaces. } else if (isa(D)) { // fallout } else if (auto *TD = dyn_cast(D)) { // A typedef declaration has linkage if it gives a type a name for // linkage purposes. if (!TD->getAnonDeclWithTypedefName(/*AnyRedecl*/true)) return LinkageInfo::none(); } else if (isa(D)) { // A GUID behaves like an inline variable with external linkage. Fall // through. // Everything not covered here has no linkage. } else { return LinkageInfo::none(); } // If we ended up with non-externally-visible linkage, visibility should // always be default. if (!isExternallyVisible(LV.getLinkage())) return LinkageInfo(LV.getLinkage(), DefaultVisibility, false); return LV; } LinkageInfo LinkageComputer::getLVForClassMember(const NamedDecl *D, LVComputationKind computation, bool IgnoreVarTypeLinkage) { // Only certain class members have linkage. Note that fields don't // really have linkage, but it's convenient to say they do for the // purposes of calculating linkage of pointer-to-data-member // template arguments. // // Templates also don't officially have linkage, but since we ignore // the C++ standard and look at template arguments when determining // linkage and visibility of a template specialization, we might hit // a template template argument that way. If we do, we need to // consider its linkage. if (!(isa(D) || isa(D) || isa(D) || isa(D) || isa(D) || isa(D))) return LinkageInfo::none(); LinkageInfo LV; // If we have an explicit visibility attribute, merge that in. if (!hasExplicitVisibilityAlready(computation)) { if (std::optional Vis = getExplicitVisibility(D, computation)) LV.mergeVisibility(*Vis, true); // If we're paying attention to global visibility, apply // -finline-visibility-hidden if this is an inline method. // // Note that we do this before merging information about // the class visibility. if (!LV.isVisibilityExplicit() && useInlineVisibilityHidden(D)) LV.mergeVisibility(HiddenVisibility, /*visibilityExplicit=*/false); } // If this class member has an explicit visibility attribute, the only // thing that can change its visibility is the template arguments, so // only look for them when processing the class. LVComputationKind classComputation = computation; if (LV.isVisibilityExplicit()) classComputation = withExplicitVisibilityAlready(computation); LinkageInfo classLV = getLVForDecl(cast(D->getDeclContext()), classComputation); // The member has the same linkage as the class. If that's not externally // visible, we don't need to compute anything about the linkage. // FIXME: If we're only computing linkage, can we bail out here? if (!isExternallyVisible(classLV.getLinkage())) return classLV; // Otherwise, don't merge in classLV yet, because in certain cases // we need to completely ignore the visibility from it. // Specifically, if this decl exists and has an explicit attribute. const NamedDecl *explicitSpecSuppressor = nullptr; if (const auto *MD = dyn_cast(D)) { // Only look at the type-as-written. Otherwise, deducing the return type // of a function could change its linkage. QualType TypeAsWritten = MD->getType(); if (TypeSourceInfo *TSI = MD->getTypeSourceInfo()) TypeAsWritten = TSI->getType(); if (!isExternallyVisible(TypeAsWritten->getLinkage())) return LinkageInfo::uniqueExternal(); // If this is a method template specialization, use the linkage for // the template parameters and arguments. if (FunctionTemplateSpecializationInfo *spec = MD->getTemplateSpecializationInfo()) { mergeTemplateLV(LV, MD, spec, computation); if (spec->isExplicitSpecialization()) { explicitSpecSuppressor = MD; } else if (isExplicitMemberSpecialization(spec->getTemplate())) { explicitSpecSuppressor = spec->getTemplate()->getTemplatedDecl(); } } else if (isExplicitMemberSpecialization(MD)) { explicitSpecSuppressor = MD; } // OpenMP target declare device functions are not callable from the host so // they should not be exported from the device image. This applies to all // functions as the host-callable kernel functions are emitted at codegen. ASTContext &Context = D->getASTContext(); if (Context.getLangOpts().OpenMP && Context.getLangOpts().OpenMPIsTargetDevice && ((Context.getTargetInfo().getTriple().isAMDGPU() || Context.getTargetInfo().getTriple().isNVPTX()) || OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(MD))) LV.mergeVisibility(HiddenVisibility, /*newExplicit=*/false); } else if (const auto *RD = dyn_cast(D)) { if (const auto *spec = dyn_cast(RD)) { mergeTemplateLV(LV, spec, computation); if (spec->isExplicitSpecialization()) { explicitSpecSuppressor = spec; } else { const ClassTemplateDecl *temp = spec->getSpecializedTemplate(); if (isExplicitMemberSpecialization(temp)) { explicitSpecSuppressor = temp->getTemplatedDecl(); } } } else if (isExplicitMemberSpecialization(RD)) { explicitSpecSuppressor = RD; } // Static data members. } else if (const auto *VD = dyn_cast(D)) { if (const auto *spec = dyn_cast(VD)) mergeTemplateLV(LV, spec, computation); // Modify the variable's linkage by its type, but ignore the // type's visibility unless it's a definition. if (!IgnoreVarTypeLinkage) { LinkageInfo typeLV = getLVForType(*VD->getType(), computation); // FIXME: If the type's linkage is not externally visible, we can // give this static data member UniqueExternalLinkage. if (!LV.isVisibilityExplicit() && !classLV.isVisibilityExplicit()) LV.mergeVisibility(typeLV); LV.mergeExternalVisibility(typeLV); } if (isExplicitMemberSpecialization(VD)) { explicitSpecSuppressor = VD; } // Template members. } else if (const auto *temp = dyn_cast(D)) { bool considerVisibility = (!LV.isVisibilityExplicit() && !classLV.isVisibilityExplicit() && !hasExplicitVisibilityAlready(computation)); LinkageInfo tempLV = getLVForTemplateParameterList(temp->getTemplateParameters(), computation); LV.mergeMaybeWithVisibility(tempLV, considerVisibility); if (const auto *redeclTemp = dyn_cast(temp)) { if (isExplicitMemberSpecialization(redeclTemp)) { explicitSpecSuppressor = temp->getTemplatedDecl(); } } } // We should never be looking for an attribute directly on a template. assert(!explicitSpecSuppressor || !isa(explicitSpecSuppressor)); // If this member is an explicit member specialization, and it has // an explicit attribute, ignore visibility from the parent. bool considerClassVisibility = true; if (explicitSpecSuppressor && // optimization: hasDVA() is true only with explicit visibility. LV.isVisibilityExplicit() && classLV.getVisibility() != DefaultVisibility && hasDirectVisibilityAttribute(explicitSpecSuppressor, computation)) { considerClassVisibility = false; } // Finally, merge in information from the class. LV.mergeMaybeWithVisibility(classLV, considerClassVisibility); return LV; } void NamedDecl::anchor() {} bool NamedDecl::isLinkageValid() const { if (!hasCachedLinkage()) return true; Linkage L = LinkageComputer{} .computeLVForDecl(this, LVComputationKind::forLinkageOnly()) .getLinkage(); return L == getCachedLinkage(); } bool NamedDecl::isPlaceholderVar(const LangOptions &LangOpts) const { // [C++2c] [basic.scope.scope]/p5 // A declaration is name-independent if its name is _ and it declares // - a variable with automatic storage duration, // - a structured binding not inhabiting a namespace scope, // - the variable introduced by an init-capture // - or a non-static data member. if (!LangOpts.CPlusPlus || !getIdentifier() || !getIdentifier()->isPlaceholder()) return false; if (isa(this)) return true; if (const auto *IFD = dyn_cast(this)) { if (!getDeclContext()->isFunctionOrMethod() && !getDeclContext()->isRecord()) return false; const VarDecl *VD = IFD->getVarDecl(); return !VD || VD->getStorageDuration() == SD_Automatic; } // and it declares a variable with automatic storage duration if (const auto *VD = dyn_cast(this)) { if (isa(VD)) return false; if (VD->isInitCapture()) return true; return VD->getStorageDuration() == StorageDuration::SD_Automatic; } if (const auto *BD = dyn_cast(this); BD && getDeclContext()->isFunctionOrMethod()) { const VarDecl *VD = BD->getHoldingVar(); return !VD || VD->getStorageDuration() == StorageDuration::SD_Automatic; } return false; } ReservedIdentifierStatus NamedDecl::isReserved(const LangOptions &LangOpts) const { const IdentifierInfo *II = getIdentifier(); // This triggers at least for CXXLiteralIdentifiers, which we already checked // at lexing time. if (!II) return ReservedIdentifierStatus::NotReserved; ReservedIdentifierStatus Status = II->isReserved(LangOpts); if (isReservedAtGlobalScope(Status) && !isReservedInAllContexts(Status)) { // This name is only reserved at global scope. Check if this declaration // conflicts with a global scope declaration. if (isa(this) || isTemplateParameter()) return ReservedIdentifierStatus::NotReserved; // C++ [dcl.link]/7: // Two declarations [conflict] if [...] one declares a function or // variable with C language linkage, and the other declares [...] a // variable that belongs to the global scope. // // Therefore names that are reserved at global scope are also reserved as // names of variables and functions with C language linkage. const DeclContext *DC = getDeclContext()->getRedeclContext(); if (DC->isTranslationUnit()) return Status; if (auto *VD = dyn_cast(this)) if (VD->isExternC()) return ReservedIdentifierStatus::StartsWithUnderscoreAndIsExternC; if (auto *FD = dyn_cast(this)) if (FD->isExternC()) return ReservedIdentifierStatus::StartsWithUnderscoreAndIsExternC; return ReservedIdentifierStatus::NotReserved; } return Status; } ObjCStringFormatFamily NamedDecl::getObjCFStringFormattingFamily() const { StringRef name = getName(); if (name.empty()) return SFF_None; if (name.front() == 'C') if (name == "CFStringCreateWithFormat" || name == "CFStringCreateWithFormatAndArguments" || name == "CFStringAppendFormat" || name == "CFStringAppendFormatAndArguments") return SFF_CFString; return SFF_None; } Linkage NamedDecl::getLinkageInternal() const { // We don't care about visibility here, so ask for the cheapest // possible visibility analysis. return LinkageComputer{} .getLVForDecl(this, LVComputationKind::forLinkageOnly()) .getLinkage(); } static bool isExportedFromModuleInterfaceUnit(const NamedDecl *D) { // FIXME: Handle isModulePrivate. switch (D->getModuleOwnershipKind()) { case Decl::ModuleOwnershipKind::Unowned: case Decl::ModuleOwnershipKind::ReachableWhenImported: case Decl::ModuleOwnershipKind::ModulePrivate: return false; case Decl::ModuleOwnershipKind::Visible: case Decl::ModuleOwnershipKind::VisibleWhenImported: return D->isInNamedModule(); } llvm_unreachable("unexpected module ownership kind"); } /// Get the linkage from a semantic point of view. Entities in /// anonymous namespaces are external (in c++98). Linkage NamedDecl::getFormalLinkage() const { Linkage InternalLinkage = getLinkageInternal(); // C++ [basic.link]p4.8: // - if the declaration of the name is attached to a named module and is not // exported // the name has module linkage; // // [basic.namespace.general]/p2 // A namespace is never attached to a named module and never has a name with // module linkage. if (isInNamedModule() && InternalLinkage == Linkage::External && !isExportedFromModuleInterfaceUnit( cast(this->getCanonicalDecl())) && !isa(this)) InternalLinkage = Linkage::Module; return clang::getFormalLinkage(InternalLinkage); } LinkageInfo NamedDecl::getLinkageAndVisibility() const { return LinkageComputer{}.getDeclLinkageAndVisibility(this); } static std::optional getExplicitVisibilityAux(const NamedDecl *ND, NamedDecl::ExplicitVisibilityKind kind, bool IsMostRecent) { assert(!IsMostRecent || ND == ND->getMostRecentDecl()); // Check the declaration itself first. if (std::optional V = getVisibilityOf(ND, kind)) return V; // If this is a member class of a specialization of a class template // and the corresponding decl has explicit visibility, use that. if (const auto *RD = dyn_cast(ND)) { CXXRecordDecl *InstantiatedFrom = RD->getInstantiatedFromMemberClass(); if (InstantiatedFrom) return getVisibilityOf(InstantiatedFrom, kind); } // If there wasn't explicit visibility there, and this is a // specialization of a class template, check for visibility // on the pattern. if (const auto *spec = dyn_cast(ND)) { // Walk all the template decl till this point to see if there are // explicit visibility attributes. const auto *TD = spec->getSpecializedTemplate()->getTemplatedDecl(); while (TD != nullptr) { auto Vis = getVisibilityOf(TD, kind); if (Vis != std::nullopt) return Vis; TD = TD->getPreviousDecl(); } return std::nullopt; } // Use the most recent declaration. if (!IsMostRecent && !isa(ND)) { const NamedDecl *MostRecent = ND->getMostRecentDecl(); if (MostRecent != ND) return getExplicitVisibilityAux(MostRecent, kind, true); } if (const auto *Var = dyn_cast(ND)) { if (Var->isStaticDataMember()) { VarDecl *InstantiatedFrom = Var->getInstantiatedFromStaticDataMember(); if (InstantiatedFrom) return getVisibilityOf(InstantiatedFrom, kind); } if (const auto *VTSD = dyn_cast(Var)) return getVisibilityOf(VTSD->getSpecializedTemplate()->getTemplatedDecl(), kind); return std::nullopt; } // Also handle function template specializations. if (const auto *fn = dyn_cast(ND)) { // If the function is a specialization of a template with an // explicit visibility attribute, use that. if (FunctionTemplateSpecializationInfo *templateInfo = fn->getTemplateSpecializationInfo()) return getVisibilityOf(templateInfo->getTemplate()->getTemplatedDecl(), kind); // If the function is a member of a specialization of a class template // and the corresponding decl has explicit visibility, use that. FunctionDecl *InstantiatedFrom = fn->getInstantiatedFromMemberFunction(); if (InstantiatedFrom) return getVisibilityOf(InstantiatedFrom, kind); return std::nullopt; } // The visibility of a template is stored in the templated decl. if (const auto *TD = dyn_cast(ND)) return getVisibilityOf(TD->getTemplatedDecl(), kind); return std::nullopt; } std::optional NamedDecl::getExplicitVisibility(ExplicitVisibilityKind kind) const { return getExplicitVisibilityAux(this, kind, false); } LinkageInfo LinkageComputer::getLVForClosure(const DeclContext *DC, Decl *ContextDecl, LVComputationKind computation) { // This lambda has its linkage/visibility determined by its owner. const NamedDecl *Owner; if (!ContextDecl) Owner = dyn_cast(DC); else if (isa(ContextDecl)) Owner = dyn_cast(ContextDecl->getDeclContext()->getRedeclContext()); else if (isa(ContextDecl)) { // Replace with the concept's owning decl, which is either a namespace or a // TU, so this needs a dyn_cast. Owner = dyn_cast(ContextDecl->getDeclContext()); } else { Owner = cast(ContextDecl); } if (!Owner) return LinkageInfo::none(); // If the owner has a deduced type, we need to skip querying the linkage and // visibility of that type, because it might involve this closure type. The // only effect of this is that we might give a lambda VisibleNoLinkage rather // than NoLinkage when we don't strictly need to, which is benign. auto *VD = dyn_cast(Owner); LinkageInfo OwnerLV = VD && VD->getType()->getContainedDeducedType() ? computeLVForDecl(Owner, computation, /*IgnoreVarTypeLinkage*/true) : getLVForDecl(Owner, computation); // A lambda never formally has linkage. But if the owner is externally // visible, then the lambda is too. We apply the same rules to blocks. if (!isExternallyVisible(OwnerLV.getLinkage())) return LinkageInfo::none(); return LinkageInfo(Linkage::VisibleNone, OwnerLV.getVisibility(), OwnerLV.isVisibilityExplicit()); } LinkageInfo LinkageComputer::getLVForLocalDecl(const NamedDecl *D, LVComputationKind computation) { if (const auto *Function = dyn_cast(D)) { if (Function->isInAnonymousNamespace() && !isFirstInExternCContext(Function)) return LinkageInfo::internal(); // This is a "void f();" which got merged with a file static. if (Function->getCanonicalDecl()->getStorageClass() == SC_Static) return LinkageInfo::internal(); LinkageInfo LV; if (!hasExplicitVisibilityAlready(computation)) { if (std::optional Vis = getExplicitVisibility(Function, computation)) LV.mergeVisibility(*Vis, true); } // Note that Sema::MergeCompatibleFunctionDecls already takes care of // merging storage classes and visibility attributes, so we don't have to // look at previous decls in here. return LV; } if (const auto *Var = dyn_cast(D)) { if (Var->hasExternalStorage()) { if (Var->isInAnonymousNamespace() && !isFirstInExternCContext(Var)) return LinkageInfo::internal(); LinkageInfo LV; if (Var->getStorageClass() == SC_PrivateExtern) LV.mergeVisibility(HiddenVisibility, true); else if (!hasExplicitVisibilityAlready(computation)) { if (std::optional Vis = getExplicitVisibility(Var, computation)) LV.mergeVisibility(*Vis, true); } if (const VarDecl *Prev = Var->getPreviousDecl()) { LinkageInfo PrevLV = getLVForDecl(Prev, computation); if (PrevLV.getLinkage() != Linkage::Invalid) LV.setLinkage(PrevLV.getLinkage()); LV.mergeVisibility(PrevLV); } return LV; } if (!Var->isStaticLocal()) return LinkageInfo::none(); } ASTContext &Context = D->getASTContext(); if (!Context.getLangOpts().CPlusPlus) return LinkageInfo::none(); const Decl *OuterD = getOutermostFuncOrBlockContext(D); if (!OuterD || OuterD->isInvalidDecl()) return LinkageInfo::none(); LinkageInfo LV; if (const auto *BD = dyn_cast(OuterD)) { if (!BD->getBlockManglingNumber()) return LinkageInfo::none(); LV = getLVForClosure(BD->getDeclContext()->getRedeclContext(), BD->getBlockManglingContextDecl(), computation); } else { const auto *FD = cast(OuterD); if (!FD->isInlined() && !isTemplateInstantiation(FD->getTemplateSpecializationKind())) return LinkageInfo::none(); // If a function is hidden by -fvisibility-inlines-hidden option and // is not explicitly attributed as a hidden function, // we should not make static local variables in the function hidden. LV = getLVForDecl(FD, computation); if (isa(D) && useInlineVisibilityHidden(FD) && !LV.isVisibilityExplicit() && !Context.getLangOpts().VisibilityInlinesHiddenStaticLocalVar) { assert(cast(D)->isStaticLocal()); // If this was an implicitly hidden inline method, check again for // explicit visibility on the parent class, and use that for static locals // if present. if (const auto *MD = dyn_cast(FD)) LV = getLVForDecl(MD->getParent(), computation); if (!LV.isVisibilityExplicit()) { Visibility globalVisibility = computation.isValueVisibility() ? Context.getLangOpts().getValueVisibilityMode() : Context.getLangOpts().getTypeVisibilityMode(); return LinkageInfo(Linkage::VisibleNone, globalVisibility, /*visibilityExplicit=*/false); } } } if (!isExternallyVisible(LV.getLinkage())) return LinkageInfo::none(); return LinkageInfo(Linkage::VisibleNone, LV.getVisibility(), LV.isVisibilityExplicit()); } LinkageInfo LinkageComputer::computeLVForDecl(const NamedDecl *D, LVComputationKind computation, bool IgnoreVarTypeLinkage) { // Internal_linkage attribute overrides other considerations. if (D->hasAttr()) return LinkageInfo::internal(); // Objective-C: treat all Objective-C declarations as having external // linkage. switch (D->getKind()) { default: break; // Per C++ [basic.link]p2, only the names of objects, references, // functions, types, templates, namespaces, and values ever have linkage. // // Note that the name of a typedef, namespace alias, using declaration, // and so on are not the name of the corresponding type, namespace, or // declaration, so they do *not* have linkage. case Decl::ImplicitParam: case Decl::Label: case Decl::NamespaceAlias: case Decl::ParmVar: case Decl::Using: case Decl::UsingEnum: case Decl::UsingShadow: case Decl::UsingDirective: return LinkageInfo::none(); case Decl::EnumConstant: // C++ [basic.link]p4: an enumerator has the linkage of its enumeration. if (D->getASTContext().getLangOpts().CPlusPlus) return getLVForDecl(cast(D->getDeclContext()), computation); return LinkageInfo::visible_none(); case Decl::Typedef: case Decl::TypeAlias: // A typedef declaration has linkage if it gives a type a name for // linkage purposes. if (!cast(D) ->getAnonDeclWithTypedefName(/*AnyRedecl*/true)) return LinkageInfo::none(); break; case Decl::TemplateTemplateParm: // count these as external case Decl::NonTypeTemplateParm: case Decl::ObjCAtDefsField: case Decl::ObjCCategory: case Decl::ObjCCategoryImpl: case Decl::ObjCCompatibleAlias: case Decl::ObjCImplementation: case Decl::ObjCMethod: case Decl::ObjCProperty: case Decl::ObjCPropertyImpl: case Decl::ObjCProtocol: return getExternalLinkageFor(D); case Decl::CXXRecord: { const auto *Record = cast(D); if (Record->isLambda()) { if (Record->hasKnownLambdaInternalLinkage() || !Record->getLambdaManglingNumber()) { // This lambda has no mangling number, so it's internal. return LinkageInfo::internal(); } return getLVForClosure( Record->getDeclContext()->getRedeclContext(), Record->getLambdaContextDecl(), computation); } break; } case Decl::TemplateParamObject: { // The template parameter object can be referenced from anywhere its type // and value can be referenced. auto *TPO = cast(D); LinkageInfo LV = getLVForType(*TPO->getType(), computation); LV.merge(getLVForValue(TPO->getValue(), computation)); return LV; } } // Handle linkage for namespace-scope names. if (D->getDeclContext()->getRedeclContext()->isFileContext()) return getLVForNamespaceScopeDecl(D, computation, IgnoreVarTypeLinkage); // C++ [basic.link]p5: // In addition, a member function, static data member, a named // class or enumeration of class scope, or an unnamed class or // enumeration defined in a class-scope typedef declaration such // that the class or enumeration has the typedef name for linkage // purposes (7.1.3), has external linkage if the name of the class // has external linkage. if (D->getDeclContext()->isRecord()) return getLVForClassMember(D, computation, IgnoreVarTypeLinkage); // C++ [basic.link]p6: // The name of a function declared in block scope and the name of // an object declared by a block scope extern declaration have // linkage. If there is a visible declaration of an entity with // linkage having the same name and type, ignoring entities // declared outside the innermost enclosing namespace scope, the // block scope declaration declares that same entity and receives // the linkage of the previous declaration. If there is more than // one such matching entity, the program is ill-formed. Otherwise, // if no matching entity is found, the block scope entity receives // external linkage. if (D->getDeclContext()->isFunctionOrMethod()) return getLVForLocalDecl(D, computation); // C++ [basic.link]p6: // Names not covered by these rules have no linkage. return LinkageInfo::none(); } /// getLVForDecl - Get the linkage and visibility for the given declaration. LinkageInfo LinkageComputer::getLVForDecl(const NamedDecl *D, LVComputationKind computation) { // Internal_linkage attribute overrides other considerations. if (D->hasAttr()) return LinkageInfo::internal(); if (computation.IgnoreAllVisibility && D->hasCachedLinkage()) return LinkageInfo(D->getCachedLinkage(), DefaultVisibility, false); if (std::optional LI = lookup(D, computation)) return *LI; LinkageInfo LV = computeLVForDecl(D, computation); if (D->hasCachedLinkage()) assert(D->getCachedLinkage() == LV.getLinkage()); D->setCachedLinkage(LV.getLinkage()); cache(D, computation, LV); #ifndef NDEBUG // In C (because of gnu inline) and in c++ with microsoft extensions an // static can follow an extern, so we can have two decls with different // linkages. const LangOptions &Opts = D->getASTContext().getLangOpts(); if (!Opts.CPlusPlus || Opts.MicrosoftExt) return LV; // We have just computed the linkage for this decl. By induction we know // that all other computed linkages match, check that the one we just // computed also does. NamedDecl *Old = nullptr; for (auto *I : D->redecls()) { auto *T = cast(I); if (T == D) continue; if (!T->isInvalidDecl() && T->hasCachedLinkage()) { Old = T; break; } } assert(!Old || Old->getCachedLinkage() == D->getCachedLinkage()); #endif return LV; } LinkageInfo LinkageComputer::getDeclLinkageAndVisibility(const NamedDecl *D) { NamedDecl::ExplicitVisibilityKind EK = usesTypeVisibility(D) ? NamedDecl::VisibilityForType : NamedDecl::VisibilityForValue; LVComputationKind CK(EK); return getLVForDecl(D, D->getASTContext().getLangOpts().IgnoreXCOFFVisibility ? CK.forLinkageOnly() : CK); } Module *Decl::getOwningModuleForLinkage() const { if (isa(this)) // Namespaces never have module linkage. It is the entities within them // that [may] do. return nullptr; Module *M = getOwningModule(); if (!M) return nullptr; switch (M->Kind) { case Module::ModuleMapModule: // Module map modules have no special linkage semantics. return nullptr; case Module::ModuleInterfaceUnit: case Module::ModuleImplementationUnit: case Module::ModulePartitionInterface: case Module::ModulePartitionImplementation: return M; case Module::ModuleHeaderUnit: case Module::ExplicitGlobalModuleFragment: case Module::ImplicitGlobalModuleFragment: // The global module shouldn't change the linkage. return nullptr; case Module::PrivateModuleFragment: // The private module fragment is part of its containing module for linkage // purposes. return M->Parent; } llvm_unreachable("unknown module kind"); } void NamedDecl::printName(raw_ostream &OS, const PrintingPolicy &Policy) const { Name.print(OS, Policy); } void NamedDecl::printName(raw_ostream &OS) const { printName(OS, getASTContext().getPrintingPolicy()); } std::string NamedDecl::getQualifiedNameAsString() const { std::string QualName; llvm::raw_string_ostream OS(QualName); printQualifiedName(OS, getASTContext().getPrintingPolicy()); return QualName; } void NamedDecl::printQualifiedName(raw_ostream &OS) const { printQualifiedName(OS, getASTContext().getPrintingPolicy()); } void NamedDecl::printQualifiedName(raw_ostream &OS, const PrintingPolicy &P) const { if (getDeclContext()->isFunctionOrMethod()) { // We do not print '(anonymous)' for function parameters without name. printName(OS, P); return; } printNestedNameSpecifier(OS, P); if (getDeclName()) OS << *this; else { // Give the printName override a chance to pick a different name before we // fall back to "(anonymous)". SmallString<64> NameBuffer; llvm::raw_svector_ostream NameOS(NameBuffer); printName(NameOS, P); if (NameBuffer.empty()) OS << "(anonymous)"; else OS << NameBuffer; } } void NamedDecl::printNestedNameSpecifier(raw_ostream &OS) const { printNestedNameSpecifier(OS, getASTContext().getPrintingPolicy()); } void NamedDecl::printNestedNameSpecifier(raw_ostream &OS, const PrintingPolicy &P) const { const DeclContext *Ctx = getDeclContext(); // For ObjC methods and properties, look through categories and use the // interface as context. if (auto *MD = dyn_cast(this)) { if (auto *ID = MD->getClassInterface()) Ctx = ID; } else if (auto *PD = dyn_cast(this)) { if (auto *MD = PD->getGetterMethodDecl()) if (auto *ID = MD->getClassInterface()) Ctx = ID; } else if (auto *ID = dyn_cast(this)) { if (auto *CI = ID->getContainingInterface()) Ctx = CI; } if (Ctx->isFunctionOrMethod()) return; using ContextsTy = SmallVector; ContextsTy Contexts; // Collect named contexts. DeclarationName NameInScope = getDeclName(); for (; Ctx; Ctx = Ctx->getParent()) { // Suppress anonymous namespace if requested. if (P.SuppressUnwrittenScope && isa(Ctx) && cast(Ctx)->isAnonymousNamespace()) continue; // Suppress inline namespace if it doesn't make the result ambiguous. if (P.SuppressInlineNamespace && Ctx->isInlineNamespace() && NameInScope && cast(Ctx)->isRedundantInlineQualifierFor(NameInScope)) continue; // Skip non-named contexts such as linkage specifications and ExportDecls. const NamedDecl *ND = dyn_cast(Ctx); if (!ND) continue; Contexts.push_back(Ctx); NameInScope = ND->getDeclName(); } for (const DeclContext *DC : llvm::reverse(Contexts)) { if (const auto *Spec = dyn_cast(DC)) { OS << Spec->getName(); const TemplateArgumentList &TemplateArgs = Spec->getTemplateArgs(); printTemplateArgumentList( OS, TemplateArgs.asArray(), P, Spec->getSpecializedTemplate()->getTemplateParameters()); } else if (const auto *ND = dyn_cast(DC)) { if (ND->isAnonymousNamespace()) { OS << (P.MSVCFormatting ? "`anonymous namespace\'" : "(anonymous namespace)"); } else OS << *ND; } else if (const auto *RD = dyn_cast(DC)) { if (!RD->getIdentifier()) OS << "(anonymous " << RD->getKindName() << ')'; else OS << *RD; } else if (const auto *FD = dyn_cast(DC)) { const FunctionProtoType *FT = nullptr; if (FD->hasWrittenPrototype()) FT = dyn_cast(FD->getType()->castAs()); OS << *FD << '('; if (FT) { unsigned NumParams = FD->getNumParams(); for (unsigned i = 0; i < NumParams; ++i) { if (i) OS << ", "; OS << FD->getParamDecl(i)->getType().stream(P); } if (FT->isVariadic()) { if (NumParams > 0) OS << ", "; OS << "..."; } } OS << ')'; } else if (const auto *ED = dyn_cast(DC)) { // C++ [dcl.enum]p10: Each enum-name and each unscoped // enumerator is declared in the scope that immediately contains // the enum-specifier. Each scoped enumerator is declared in the // scope of the enumeration. // For the case of unscoped enumerator, do not include in the qualified // name any information about its enum enclosing scope, as its visibility // is global. if (ED->isScoped()) OS << *ED; else continue; } else { OS << *cast(DC); } OS << "::"; } } void NamedDecl::getNameForDiagnostic(raw_ostream &OS, const PrintingPolicy &Policy, bool Qualified) const { if (Qualified) printQualifiedName(OS, Policy); else printName(OS, Policy); } template static bool isRedeclarableImpl(Redeclarable *) { return true; } static bool isRedeclarableImpl(...) { return false; } static bool isRedeclarable(Decl::Kind K) { switch (K) { #define DECL(Type, Base) \ case Decl::Type: \ return isRedeclarableImpl((Type##Decl *)nullptr); #define ABSTRACT_DECL(DECL) #include "clang/AST/DeclNodes.inc" } llvm_unreachable("unknown decl kind"); } bool NamedDecl::declarationReplaces(const NamedDecl *OldD, bool IsKnownNewer) const { assert(getDeclName() == OldD->getDeclName() && "Declaration name mismatch"); // Never replace one imported declaration with another; we need both results // when re-exporting. if (OldD->isFromASTFile() && isFromASTFile()) return false; // A kind mismatch implies that the declaration is not replaced. if (OldD->getKind() != getKind()) return false; // For method declarations, we never replace. (Why?) if (isa(this)) return false; // For parameters, pick the newer one. This is either an error or (in // Objective-C) permitted as an extension. if (isa(this)) return true; // Inline namespaces can give us two declarations with the same // name and kind in the same scope but different contexts; we should // keep both declarations in this case. if (!this->getDeclContext()->getRedeclContext()->Equals( OldD->getDeclContext()->getRedeclContext())) return false; // Using declarations can be replaced if they import the same name from the // same context. if (const auto *UD = dyn_cast(this)) { ASTContext &Context = getASTContext(); return Context.getCanonicalNestedNameSpecifier(UD->getQualifier()) == Context.getCanonicalNestedNameSpecifier( cast(OldD)->getQualifier()); } if (const auto *UUVD = dyn_cast(this)) { ASTContext &Context = getASTContext(); return Context.getCanonicalNestedNameSpecifier(UUVD->getQualifier()) == Context.getCanonicalNestedNameSpecifier( cast(OldD)->getQualifier()); } if (isRedeclarable(getKind())) { if (getCanonicalDecl() != OldD->getCanonicalDecl()) return false; if (IsKnownNewer) return true; // Check whether this is actually newer than OldD. We want to keep the // newer declaration. This loop will usually only iterate once, because // OldD is usually the previous declaration. for (const auto *D : redecls()) { if (D == OldD) break; // If we reach the canonical declaration, then OldD is not actually older // than this one. // // FIXME: In this case, we should not add this decl to the lookup table. if (D->isCanonicalDecl()) return false; } // It's a newer declaration of the same kind of declaration in the same // scope: we want this decl instead of the existing one. return true; } // In all other cases, we need to keep both declarations in case they have // different visibility. Any attempt to use the name will result in an // ambiguity if more than one is visible. return false; } bool NamedDecl::hasLinkage() const { switch (getFormalLinkage()) { case Linkage::Invalid: llvm_unreachable("Linkage hasn't been computed!"); case Linkage::None: return false; case Linkage::Internal: return true; case Linkage::UniqueExternal: case Linkage::VisibleNone: llvm_unreachable("Non-formal linkage is not allowed here!"); case Linkage::Module: case Linkage::External: return true; } llvm_unreachable("Unhandled Linkage enum"); } NamedDecl *NamedDecl::getUnderlyingDeclImpl() { NamedDecl *ND = this; if (auto *UD = dyn_cast(ND)) ND = UD->getTargetDecl(); if (auto *AD = dyn_cast(ND)) return AD->getClassInterface(); if (auto *AD = dyn_cast(ND)) return AD->getNamespace(); return ND; } bool NamedDecl::isCXXInstanceMember() const { if (!isCXXClassMember()) return false; const NamedDecl *D = this; if (isa(D)) D = cast(D)->getTargetDecl(); if (isa(D) || isa(D) || isa(D)) return true; if (const auto *MD = dyn_cast_if_present(D->getAsFunction())) return MD->isInstance(); return false; } //===----------------------------------------------------------------------===// // DeclaratorDecl Implementation //===----------------------------------------------------------------------===// template static SourceLocation getTemplateOrInnerLocStart(const DeclT *decl) { if (decl->getNumTemplateParameterLists() > 0) return decl->getTemplateParameterList(0)->getTemplateLoc(); return decl->getInnerLocStart(); } SourceLocation DeclaratorDecl::getTypeSpecStartLoc() const { TypeSourceInfo *TSI = getTypeSourceInfo(); if (TSI) return TSI->getTypeLoc().getBeginLoc(); return SourceLocation(); } SourceLocation DeclaratorDecl::getTypeSpecEndLoc() const { TypeSourceInfo *TSI = getTypeSourceInfo(); if (TSI) return TSI->getTypeLoc().getEndLoc(); return SourceLocation(); } void DeclaratorDecl::setQualifierInfo(NestedNameSpecifierLoc QualifierLoc) { if (QualifierLoc) { // Make sure the extended decl info is allocated. if (!hasExtInfo()) { // Save (non-extended) type source info pointer. auto *savedTInfo = DeclInfo.get(); // Allocate external info struct. DeclInfo = new (getASTContext()) ExtInfo; // Restore savedTInfo into (extended) decl info. getExtInfo()->TInfo = savedTInfo; } // Set qualifier info. getExtInfo()->QualifierLoc = QualifierLoc; } else if (hasExtInfo()) { // Here Qualifier == 0, i.e., we are removing the qualifier (if any). getExtInfo()->QualifierLoc = QualifierLoc; } } void DeclaratorDecl::setTrailingRequiresClause(Expr *TrailingRequiresClause) { assert(TrailingRequiresClause); // Make sure the extended decl info is allocated. if (!hasExtInfo()) { // Save (non-extended) type source info pointer. auto *savedTInfo = DeclInfo.get(); // Allocate external info struct. DeclInfo = new (getASTContext()) ExtInfo; // Restore savedTInfo into (extended) decl info. getExtInfo()->TInfo = savedTInfo; } // Set requires clause info. getExtInfo()->TrailingRequiresClause = TrailingRequiresClause; } void DeclaratorDecl::setTemplateParameterListsInfo( ASTContext &Context, ArrayRef TPLists) { assert(!TPLists.empty()); // Make sure the extended decl info is allocated. if (!hasExtInfo()) { // Save (non-extended) type source info pointer. auto *savedTInfo = DeclInfo.get(); // Allocate external info struct. DeclInfo = new (getASTContext()) ExtInfo; // Restore savedTInfo into (extended) decl info. getExtInfo()->TInfo = savedTInfo; } // Set the template parameter lists info. getExtInfo()->setTemplateParameterListsInfo(Context, TPLists); } SourceLocation DeclaratorDecl::getOuterLocStart() const { return getTemplateOrInnerLocStart(this); } // Helper function: returns true if QT is or contains a type // having a postfix component. static bool typeIsPostfix(QualType QT) { while (true) { const Type* T = QT.getTypePtr(); switch (T->getTypeClass()) { default: return false; case Type::Pointer: QT = cast(T)->getPointeeType(); break; case Type::BlockPointer: QT = cast(T)->getPointeeType(); break; case Type::MemberPointer: QT = cast(T)->getPointeeType(); break; case Type::LValueReference: case Type::RValueReference: QT = cast(T)->getPointeeType(); break; case Type::PackExpansion: QT = cast(T)->getPattern(); break; case Type::Paren: case Type::ConstantArray: case Type::DependentSizedArray: case Type::IncompleteArray: case Type::VariableArray: case Type::FunctionProto: case Type::FunctionNoProto: return true; } } } SourceRange DeclaratorDecl::getSourceRange() const { SourceLocation RangeEnd = getLocation(); if (TypeSourceInfo *TInfo = getTypeSourceInfo()) { // If the declaration has no name or the type extends past the name take the // end location of the type. if (!getDeclName() || typeIsPostfix(TInfo->getType())) RangeEnd = TInfo->getTypeLoc().getSourceRange().getEnd(); } return SourceRange(getOuterLocStart(), RangeEnd); } void QualifierInfo::setTemplateParameterListsInfo( ASTContext &Context, ArrayRef TPLists) { // Free previous template parameters (if any). if (NumTemplParamLists > 0) { Context.Deallocate(TemplParamLists); TemplParamLists = nullptr; NumTemplParamLists = 0; } // Set info on matched template parameter lists (if any). if (!TPLists.empty()) { TemplParamLists = new (Context) TemplateParameterList *[TPLists.size()]; NumTemplParamLists = TPLists.size(); std::copy(TPLists.begin(), TPLists.end(), TemplParamLists); } } //===----------------------------------------------------------------------===// // VarDecl Implementation //===----------------------------------------------------------------------===// const char *VarDecl::getStorageClassSpecifierString(StorageClass SC) { switch (SC) { case SC_None: break; case SC_Auto: return "auto"; case SC_Extern: return "extern"; case SC_PrivateExtern: return "__private_extern__"; case SC_Register: return "register"; case SC_Static: return "static"; } llvm_unreachable("Invalid storage class"); } VarDecl::VarDecl(Kind DK, ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, StorageClass SC) : DeclaratorDecl(DK, DC, IdLoc, Id, T, TInfo, StartLoc), redeclarable_base(C) { static_assert(sizeof(VarDeclBitfields) <= sizeof(unsigned), "VarDeclBitfields too large!"); static_assert(sizeof(ParmVarDeclBitfields) <= sizeof(unsigned), "ParmVarDeclBitfields too large!"); static_assert(sizeof(NonParmVarDeclBitfields) <= sizeof(unsigned), "NonParmVarDeclBitfields too large!"); AllBits = 0; VarDeclBits.SClass = SC; // Everything else is implicitly initialized to false. } VarDecl *VarDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation StartL, SourceLocation IdL, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, StorageClass S) { return new (C, DC) VarDecl(Var, C, DC, StartL, IdL, Id, T, TInfo, S); } VarDecl *VarDecl::CreateDeserialized(ASTContext &C, GlobalDeclID ID) { return new (C, ID) VarDecl(Var, C, nullptr, SourceLocation(), SourceLocation(), nullptr, QualType(), nullptr, SC_None); } void VarDecl::setStorageClass(StorageClass SC) { assert(isLegalForVariable(SC)); VarDeclBits.SClass = SC; } VarDecl::TLSKind VarDecl::getTLSKind() const { switch (VarDeclBits.TSCSpec) { case TSCS_unspecified: if (!hasAttr() && !(getASTContext().getLangOpts().OpenMPUseTLS && getASTContext().getTargetInfo().isTLSSupported() && hasAttr())) return TLS_None; return ((getASTContext().getLangOpts().isCompatibleWithMSVC( LangOptions::MSVC2015)) || hasAttr()) ? TLS_Dynamic : TLS_Static; case TSCS___thread: // Fall through. case TSCS__Thread_local: return TLS_Static; case TSCS_thread_local: return TLS_Dynamic; } llvm_unreachable("Unknown thread storage class specifier!"); } SourceRange VarDecl::getSourceRange() const { if (const Expr *Init = getInit()) { SourceLocation InitEnd = Init->getEndLoc(); // If Init is implicit, ignore its source range and fallback on // DeclaratorDecl::getSourceRange() to handle postfix elements. if (InitEnd.isValid() && InitEnd != getLocation()) return SourceRange(getOuterLocStart(), InitEnd); } return DeclaratorDecl::getSourceRange(); } template static LanguageLinkage getDeclLanguageLinkage(const T &D) { // C++ [dcl.link]p1: All function types, function names with external linkage, // and variable names with external linkage have a language linkage. if (!D.hasExternalFormalLinkage()) return NoLanguageLinkage; // Language linkage is a C++ concept, but saying that everything else in C has // C language linkage fits the implementation nicely. if (!D.getASTContext().getLangOpts().CPlusPlus) return CLanguageLinkage; // C++ [dcl.link]p4: A C language linkage is ignored in determining the // language linkage of the names of class members and the function type of // class member functions. const DeclContext *DC = D.getDeclContext(); if (DC->isRecord()) return CXXLanguageLinkage; // If the first decl is in an extern "C" context, any other redeclaration // will have C language linkage. If the first one is not in an extern "C" // context, we would have reported an error for any other decl being in one. if (isFirstInExternCContext(&D)) return CLanguageLinkage; return CXXLanguageLinkage; } template static bool isDeclExternC(const T &D) { // Since the context is ignored for class members, they can only have C++ // language linkage or no language linkage. const DeclContext *DC = D.getDeclContext(); if (DC->isRecord()) { assert(D.getASTContext().getLangOpts().CPlusPlus); return false; } return D.getLanguageLinkage() == CLanguageLinkage; } LanguageLinkage VarDecl::getLanguageLinkage() const { return getDeclLanguageLinkage(*this); } bool VarDecl::isExternC() const { return isDeclExternC(*this); } bool VarDecl::isInExternCContext() const { return getLexicalDeclContext()->isExternCContext(); } bool VarDecl::isInExternCXXContext() const { return getLexicalDeclContext()->isExternCXXContext(); } VarDecl *VarDecl::getCanonicalDecl() { return getFirstDecl(); } VarDecl::DefinitionKind VarDecl::isThisDeclarationADefinition(ASTContext &C) const { if (isThisDeclarationADemotedDefinition()) return DeclarationOnly; // C++ [basic.def]p2: // A declaration is a definition unless [...] it contains the 'extern' // specifier or a linkage-specification and neither an initializer [...], // it declares a non-inline static data member in a class declaration [...], // it declares a static data member outside a class definition and the variable // was defined within the class with the constexpr specifier [...], // C++1y [temp.expl.spec]p15: // An explicit specialization of a static data member or an explicit // specialization of a static data member template is a definition if the // declaration includes an initializer; otherwise, it is a declaration. // // FIXME: How do you declare (but not define) a partial specialization of // a static data member template outside the containing class? if (isStaticDataMember()) { if (isOutOfLine() && !(getCanonicalDecl()->isInline() && getCanonicalDecl()->isConstexpr()) && (hasInit() || // If the first declaration is out-of-line, this may be an // instantiation of an out-of-line partial specialization of a variable // template for which we have not yet instantiated the initializer. (getFirstDecl()->isOutOfLine() ? getTemplateSpecializationKind() == TSK_Undeclared : getTemplateSpecializationKind() != TSK_ExplicitSpecialization) || isa(this))) return Definition; if (!isOutOfLine() && isInline()) return Definition; return DeclarationOnly; } // C99 6.7p5: // A definition of an identifier is a declaration for that identifier that // [...] causes storage to be reserved for that object. // Note: that applies for all non-file-scope objects. // C99 6.9.2p1: // If the declaration of an identifier for an object has file scope and an // initializer, the declaration is an external definition for the identifier if (hasInit()) return Definition; if (hasDefiningAttr()) return Definition; if (const auto *SAA = getAttr()) if (!SAA->isInherited()) return Definition; // A variable template specialization (other than a static data member // template or an explicit specialization) is a declaration until we // instantiate its initializer. if (auto *VTSD = dyn_cast(this)) { if (VTSD->getTemplateSpecializationKind() != TSK_ExplicitSpecialization && !isa(VTSD) && !VTSD->IsCompleteDefinition) return DeclarationOnly; } if (hasExternalStorage()) return DeclarationOnly; // [dcl.link] p7: // A declaration directly contained in a linkage-specification is treated // as if it contains the extern specifier for the purpose of determining // the linkage of the declared name and whether it is a definition. if (isSingleLineLanguageLinkage(*this)) return DeclarationOnly; // C99 6.9.2p2: // A declaration of an object that has file scope without an initializer, // and without a storage class specifier or the scs 'static', constitutes // a tentative definition. // No such thing in C++. if (!C.getLangOpts().CPlusPlus && isFileVarDecl()) return TentativeDefinition; // What's left is (in C, block-scope) declarations without initializers or // external storage. These are definitions. return Definition; } VarDecl *VarDecl::getActingDefinition() { DefinitionKind Kind = isThisDeclarationADefinition(); if (Kind != TentativeDefinition) return nullptr; VarDecl *LastTentative = nullptr; // Loop through the declaration chain, starting with the most recent. for (VarDecl *Decl = getMostRecentDecl(); Decl; Decl = Decl->getPreviousDecl()) { Kind = Decl->isThisDeclarationADefinition(); if (Kind == Definition) return nullptr; // Record the first (most recent) TentativeDefinition that is encountered. if (Kind == TentativeDefinition && !LastTentative) LastTentative = Decl; } return LastTentative; } VarDecl *VarDecl::getDefinition(ASTContext &C) { VarDecl *First = getFirstDecl(); for (auto *I : First->redecls()) { if (I->isThisDeclarationADefinition(C) == Definition) return I; } return nullptr; } VarDecl::DefinitionKind VarDecl::hasDefinition(ASTContext &C) const { DefinitionKind Kind = DeclarationOnly; const VarDecl *First = getFirstDecl(); for (auto *I : First->redecls()) { Kind = std::max(Kind, I->isThisDeclarationADefinition(C)); if (Kind == Definition) break; } return Kind; } const Expr *VarDecl::getAnyInitializer(const VarDecl *&D) const { for (auto *I : redecls()) { if (auto Expr = I->getInit()) { D = I; return Expr; } } return nullptr; } bool VarDecl::hasInit() const { if (auto *P = dyn_cast(this)) if (P->hasUnparsedDefaultArg() || P->hasUninstantiatedDefaultArg()) return false; if (auto *Eval = getEvaluatedStmt()) return Eval->Value.isValid(); return !Init.isNull(); } Expr *VarDecl::getInit() { if (!hasInit()) return nullptr; if (auto *S = Init.dyn_cast()) return cast(S); auto *Eval = getEvaluatedStmt(); return cast(Eval->Value.get( Eval->Value.isOffset() ? getASTContext().getExternalSource() : nullptr)); } Stmt **VarDecl::getInitAddress() { if (auto *ES = Init.dyn_cast()) return ES->Value.getAddressOfPointer(getASTContext().getExternalSource()); return Init.getAddrOfPtr1(); } VarDecl *VarDecl::getInitializingDeclaration() { VarDecl *Def = nullptr; for (auto *I : redecls()) { if (I->hasInit()) return I; if (I->isThisDeclarationADefinition()) { if (isStaticDataMember()) return I; Def = I; } } return Def; } bool VarDecl::isOutOfLine() const { if (Decl::isOutOfLine()) return true; if (!isStaticDataMember()) return false; // If this static data member was instantiated from a static data member of // a class template, check whether that static data member was defined // out-of-line. if (VarDecl *VD = getInstantiatedFromStaticDataMember()) return VD->isOutOfLine(); return false; } void VarDecl::setInit(Expr *I) { if (auto *Eval = Init.dyn_cast()) { Eval->~EvaluatedStmt(); getASTContext().Deallocate(Eval); } Init = I; } bool VarDecl::mightBeUsableInConstantExpressions(const ASTContext &C) const { const LangOptions &Lang = C.getLangOpts(); // OpenCL permits const integral variables to be used in constant // expressions, like in C++98. if (!Lang.CPlusPlus && !Lang.OpenCL && !Lang.C23) return false; // Function parameters are never usable in constant expressions. if (isa(this)) return false; // The values of weak variables are never usable in constant expressions. if (isWeak()) return false; // In C++11, any variable of reference type can be used in a constant // expression if it is initialized by a constant expression. if (Lang.CPlusPlus11 && getType()->isReferenceType()) return true; // Only const objects can be used in constant expressions in C++. C++98 does // not require the variable to be non-volatile, but we consider this to be a // defect. if (!getType().isConstant(C) || getType().isVolatileQualified()) return false; // In C++, but not in C, const, non-volatile variables of integral or // enumeration types can be used in constant expressions. if (getType()->isIntegralOrEnumerationType() && !Lang.C23) return true; // C23 6.6p7: An identifier that is: // ... // - declared with storage-class specifier constexpr and has an object type, // is a named constant, ... such a named constant is a constant expression // with the type and value of the declared object. // Additionally, in C++11, non-volatile constexpr variables can be used in // constant expressions. return (Lang.CPlusPlus11 || Lang.C23) && isConstexpr(); } bool VarDecl::isUsableInConstantExpressions(const ASTContext &Context) const { // C++2a [expr.const]p3: // A variable is usable in constant expressions after its initializing // declaration is encountered... const VarDecl *DefVD = nullptr; const Expr *Init = getAnyInitializer(DefVD); if (!Init || Init->isValueDependent() || getType()->isDependentType()) return false; // ... if it is a constexpr variable, or it is of reference type or of // const-qualified integral or enumeration type, ... if (!DefVD->mightBeUsableInConstantExpressions(Context)) return false; // ... and its initializer is a constant initializer. - if (Context.getLangOpts().CPlusPlus && !DefVD->hasConstantInitialization()) + if ((Context.getLangOpts().CPlusPlus || getLangOpts().C23) && + !DefVD->hasConstantInitialization()) return false; // C++98 [expr.const]p1: // An integral constant-expression can involve only [...] const variables // or static data members of integral or enumeration types initialized with // [integer] constant expressions (dcl.init) if ((Context.getLangOpts().CPlusPlus || Context.getLangOpts().OpenCL) && !Context.getLangOpts().CPlusPlus11 && !DefVD->hasICEInitializer(Context)) return false; return true; } /// Convert the initializer for this declaration to the elaborated EvaluatedStmt /// form, which contains extra information on the evaluated value of the /// initializer. EvaluatedStmt *VarDecl::ensureEvaluatedStmt() const { auto *Eval = Init.dyn_cast(); if (!Eval) { // Note: EvaluatedStmt contains an APValue, which usually holds // resources not allocated from the ASTContext. We need to do some // work to avoid leaking those, but we do so in VarDecl::evaluateValue // where we can detect whether there's anything to clean up or not. Eval = new (getASTContext()) EvaluatedStmt; Eval->Value = Init.get(); Init = Eval; } return Eval; } EvaluatedStmt *VarDecl::getEvaluatedStmt() const { return Init.dyn_cast(); } APValue *VarDecl::evaluateValue() const { SmallVector Notes; return evaluateValueImpl(Notes, hasConstantInitialization()); } APValue *VarDecl::evaluateValueImpl(SmallVectorImpl &Notes, bool IsConstantInitialization) const { EvaluatedStmt *Eval = ensureEvaluatedStmt(); const auto *Init = getInit(); assert(!Init->isValueDependent()); // We only produce notes indicating why an initializer is non-constant the // first time it is evaluated. FIXME: The notes won't always be emitted the // first time we try evaluation, so might not be produced at all. if (Eval->WasEvaluated) return Eval->Evaluated.isAbsent() ? nullptr : &Eval->Evaluated; if (Eval->IsEvaluating) { // FIXME: Produce a diagnostic for self-initialization. return nullptr; } Eval->IsEvaluating = true; ASTContext &Ctx = getASTContext(); bool Result = Init->EvaluateAsInitializer(Eval->Evaluated, Ctx, this, Notes, IsConstantInitialization); // In C++, or in C23 if we're initialising a 'constexpr' variable, this isn't // a constant initializer if we produced notes. In that case, we can't keep // the result, because it may only be correct under the assumption that the // initializer is a constant context. if (IsConstantInitialization && (Ctx.getLangOpts().CPlusPlus || (isConstexpr() && Ctx.getLangOpts().C23)) && !Notes.empty()) Result = false; // Ensure the computed APValue is cleaned up later if evaluation succeeded, // or that it's empty (so that there's nothing to clean up) if evaluation // failed. if (!Result) Eval->Evaluated = APValue(); else if (Eval->Evaluated.needsCleanup()) Ctx.addDestruction(&Eval->Evaluated); Eval->IsEvaluating = false; Eval->WasEvaluated = true; return Result ? &Eval->Evaluated : nullptr; } APValue *VarDecl::getEvaluatedValue() const { if (EvaluatedStmt *Eval = getEvaluatedStmt()) if (Eval->WasEvaluated) return &Eval->Evaluated; return nullptr; } bool VarDecl::hasICEInitializer(const ASTContext &Context) const { const Expr *Init = getInit(); assert(Init && "no initializer"); EvaluatedStmt *Eval = ensureEvaluatedStmt(); if (!Eval->CheckedForICEInit) { Eval->CheckedForICEInit = true; Eval->HasICEInit = Init->isIntegerConstantExpr(Context); } return Eval->HasICEInit; } bool VarDecl::hasConstantInitialization() const { - // In C, all globals (and only globals) have constant initialization. - if (hasGlobalStorage() && !getASTContext().getLangOpts().CPlusPlus) + // In C, all globals and constexpr variables should have constant + // initialization. For constexpr variables in C check that initializer is a + // constant initializer because they can be used in constant expressions. + if (hasGlobalStorage() && !getASTContext().getLangOpts().CPlusPlus && + !isConstexpr()) return true; // In C++, it depends on whether the evaluation at the point of definition // was evaluatable as a constant initializer. if (EvaluatedStmt *Eval = getEvaluatedStmt()) return Eval->HasConstantInitialization; return false; } bool VarDecl::checkForConstantInitialization( SmallVectorImpl &Notes) const { EvaluatedStmt *Eval = ensureEvaluatedStmt(); // If we ask for the value before we know whether we have a constant // initializer, we can compute the wrong value (for example, due to // std::is_constant_evaluated()). assert(!Eval->WasEvaluated && "already evaluated var value before checking for constant init"); assert((getASTContext().getLangOpts().CPlusPlus || getASTContext().getLangOpts().C23) && "only meaningful in C++/C23"); assert(!getInit()->isValueDependent()); // Evaluate the initializer to check whether it's a constant expression. Eval->HasConstantInitialization = evaluateValueImpl(Notes, true) && Notes.empty(); // If evaluation as a constant initializer failed, allow re-evaluation as a // non-constant initializer if we later find we want the value. if (!Eval->HasConstantInitialization) Eval->WasEvaluated = false; return Eval->HasConstantInitialization; } bool VarDecl::isParameterPack() const { return isa(getType()); } template static DeclT *getDefinitionOrSelf(DeclT *D) { assert(D); if (auto *Def = D->getDefinition()) return Def; return D; } bool VarDecl::isEscapingByref() const { return hasAttr() && NonParmVarDeclBits.EscapingByref; } bool VarDecl::isNonEscapingByref() const { return hasAttr() && !NonParmVarDeclBits.EscapingByref; } bool VarDecl::hasDependentAlignment() const { QualType T = getType(); return T->isDependentType() || T->isUndeducedType() || llvm::any_of(specific_attrs(), [](const AlignedAttr *AA) { return AA->isAlignmentDependent(); }); } VarDecl *VarDecl::getTemplateInstantiationPattern() const { const VarDecl *VD = this; // If this is an instantiated member, walk back to the template from which // it was instantiated. if (MemberSpecializationInfo *MSInfo = VD->getMemberSpecializationInfo()) { if (isTemplateInstantiation(MSInfo->getTemplateSpecializationKind())) { VD = VD->getInstantiatedFromStaticDataMember(); while (auto *NewVD = VD->getInstantiatedFromStaticDataMember()) VD = NewVD; } } // If it's an instantiated variable template specialization, find the // template or partial specialization from which it was instantiated. if (auto *VDTemplSpec = dyn_cast(VD)) { if (isTemplateInstantiation(VDTemplSpec->getTemplateSpecializationKind())) { auto From = VDTemplSpec->getInstantiatedFrom(); if (auto *VTD = From.dyn_cast()) { while (!VTD->isMemberSpecialization()) { auto *NewVTD = VTD->getInstantiatedFromMemberTemplate(); if (!NewVTD) break; VTD = NewVTD; } return getDefinitionOrSelf(VTD->getTemplatedDecl()); } if (auto *VTPSD = From.dyn_cast()) { while (!VTPSD->isMemberSpecialization()) { auto *NewVTPSD = VTPSD->getInstantiatedFromMember(); if (!NewVTPSD) break; VTPSD = NewVTPSD; } return getDefinitionOrSelf(VTPSD); } } } // If this is the pattern of a variable template, find where it was // instantiated from. FIXME: Is this necessary? if (VarTemplateDecl *VarTemplate = VD->getDescribedVarTemplate()) { while (!VarTemplate->isMemberSpecialization()) { auto *NewVT = VarTemplate->getInstantiatedFromMemberTemplate(); if (!NewVT) break; VarTemplate = NewVT; } return getDefinitionOrSelf(VarTemplate->getTemplatedDecl()); } if (VD == this) return nullptr; return getDefinitionOrSelf(const_cast(VD)); } VarDecl *VarDecl::getInstantiatedFromStaticDataMember() const { if (MemberSpecializationInfo *MSI = getMemberSpecializationInfo()) return cast(MSI->getInstantiatedFrom()); return nullptr; } TemplateSpecializationKind VarDecl::getTemplateSpecializationKind() const { if (const auto *Spec = dyn_cast(this)) return Spec->getSpecializationKind(); if (MemberSpecializationInfo *MSI = getMemberSpecializationInfo()) return MSI->getTemplateSpecializationKind(); return TSK_Undeclared; } TemplateSpecializationKind VarDecl::getTemplateSpecializationKindForInstantiation() const { if (MemberSpecializationInfo *MSI = getMemberSpecializationInfo()) return MSI->getTemplateSpecializationKind(); if (const auto *Spec = dyn_cast(this)) return Spec->getSpecializationKind(); return TSK_Undeclared; } SourceLocation VarDecl::getPointOfInstantiation() const { if (const auto *Spec = dyn_cast(this)) return Spec->getPointOfInstantiation(); if (MemberSpecializationInfo *MSI = getMemberSpecializationInfo()) return MSI->getPointOfInstantiation(); return SourceLocation(); } VarTemplateDecl *VarDecl::getDescribedVarTemplate() const { return getASTContext().getTemplateOrSpecializationInfo(this) .dyn_cast(); } void VarDecl::setDescribedVarTemplate(VarTemplateDecl *Template) { getASTContext().setTemplateOrSpecializationInfo(this, Template); } bool VarDecl::isKnownToBeDefined() const { const auto &LangOpts = getASTContext().getLangOpts(); // In CUDA mode without relocatable device code, variables of form 'extern // __shared__ Foo foo[]' are pointers to the base of the GPU core's shared // memory pool. These are never undefined variables, even if they appear // inside of an anon namespace or static function. // // With CUDA relocatable device code enabled, these variables don't get // special handling; they're treated like regular extern variables. if (LangOpts.CUDA && !LangOpts.GPURelocatableDeviceCode && hasExternalStorage() && hasAttr() && isa(getType())) return true; return hasDefinition(); } bool VarDecl::isNoDestroy(const ASTContext &Ctx) const { return hasGlobalStorage() && (hasAttr() || (!Ctx.getLangOpts().RegisterStaticDestructors && !hasAttr())); } QualType::DestructionKind VarDecl::needsDestruction(const ASTContext &Ctx) const { if (EvaluatedStmt *Eval = getEvaluatedStmt()) if (Eval->HasConstantDestruction) return QualType::DK_none; if (isNoDestroy(Ctx)) return QualType::DK_none; return getType().isDestructedType(); } bool VarDecl::hasFlexibleArrayInit(const ASTContext &Ctx) const { assert(hasInit() && "Expect initializer to check for flexible array init"); auto *Ty = getType()->getAs(); if (!Ty || !Ty->getDecl()->hasFlexibleArrayMember()) return false; auto *List = dyn_cast(getInit()->IgnoreParens()); if (!List) return false; const Expr *FlexibleInit = List->getInit(List->getNumInits() - 1); auto InitTy = Ctx.getAsConstantArrayType(FlexibleInit->getType()); if (!InitTy) return false; return !InitTy->isZeroSize(); } CharUnits VarDecl::getFlexibleArrayInitChars(const ASTContext &Ctx) const { assert(hasInit() && "Expect initializer to check for flexible array init"); auto *Ty = getType()->getAs(); if (!Ty || !Ty->getDecl()->hasFlexibleArrayMember()) return CharUnits::Zero(); auto *List = dyn_cast(getInit()->IgnoreParens()); if (!List || List->getNumInits() == 0) return CharUnits::Zero(); const Expr *FlexibleInit = List->getInit(List->getNumInits() - 1); auto InitTy = Ctx.getAsConstantArrayType(FlexibleInit->getType()); if (!InitTy) return CharUnits::Zero(); CharUnits FlexibleArraySize = Ctx.getTypeSizeInChars(InitTy); const ASTRecordLayout &RL = Ctx.getASTRecordLayout(Ty->getDecl()); CharUnits FlexibleArrayOffset = Ctx.toCharUnitsFromBits(RL.getFieldOffset(RL.getFieldCount() - 1)); if (FlexibleArrayOffset + FlexibleArraySize < RL.getSize()) return CharUnits::Zero(); return FlexibleArrayOffset + FlexibleArraySize - RL.getSize(); } MemberSpecializationInfo *VarDecl::getMemberSpecializationInfo() const { if (isStaticDataMember()) // FIXME: Remove ? // return getASTContext().getInstantiatedFromStaticDataMember(this); return getASTContext().getTemplateOrSpecializationInfo(this) .dyn_cast(); return nullptr; } void VarDecl::setTemplateSpecializationKind(TemplateSpecializationKind TSK, SourceLocation PointOfInstantiation) { assert((isa(this) || getMemberSpecializationInfo()) && "not a variable or static data member template specialization"); if (VarTemplateSpecializationDecl *Spec = dyn_cast(this)) { Spec->setSpecializationKind(TSK); if (TSK != TSK_ExplicitSpecialization && PointOfInstantiation.isValid() && Spec->getPointOfInstantiation().isInvalid()) { Spec->setPointOfInstantiation(PointOfInstantiation); if (ASTMutationListener *L = getASTContext().getASTMutationListener()) L->InstantiationRequested(this); } } else if (MemberSpecializationInfo *MSI = getMemberSpecializationInfo()) { MSI->setTemplateSpecializationKind(TSK); if (TSK != TSK_ExplicitSpecialization && PointOfInstantiation.isValid() && MSI->getPointOfInstantiation().isInvalid()) { MSI->setPointOfInstantiation(PointOfInstantiation); if (ASTMutationListener *L = getASTContext().getASTMutationListener()) L->InstantiationRequested(this); } } } void VarDecl::setInstantiationOfStaticDataMember(VarDecl *VD, TemplateSpecializationKind TSK) { assert(getASTContext().getTemplateOrSpecializationInfo(this).isNull() && "Previous template or instantiation?"); getASTContext().setInstantiatedFromStaticDataMember(this, VD, TSK); } //===----------------------------------------------------------------------===// // ParmVarDecl Implementation //===----------------------------------------------------------------------===// ParmVarDecl *ParmVarDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, StorageClass S, Expr *DefArg) { return new (C, DC) ParmVarDecl(ParmVar, C, DC, StartLoc, IdLoc, Id, T, TInfo, S, DefArg); } QualType ParmVarDecl::getOriginalType() const { TypeSourceInfo *TSI = getTypeSourceInfo(); QualType T = TSI ? TSI->getType() : getType(); if (const auto *DT = dyn_cast(T)) return DT->getOriginalType(); return T; } ParmVarDecl *ParmVarDecl::CreateDeserialized(ASTContext &C, GlobalDeclID ID) { return new (C, ID) ParmVarDecl(ParmVar, C, nullptr, SourceLocation(), SourceLocation(), nullptr, QualType(), nullptr, SC_None, nullptr); } SourceRange ParmVarDecl::getSourceRange() const { if (!hasInheritedDefaultArg()) { SourceRange ArgRange = getDefaultArgRange(); if (ArgRange.isValid()) return SourceRange(getOuterLocStart(), ArgRange.getEnd()); } // DeclaratorDecl considers the range of postfix types as overlapping with the // declaration name, but this is not the case with parameters in ObjC methods. if (isa(getDeclContext())) return SourceRange(DeclaratorDecl::getBeginLoc(), getLocation()); return DeclaratorDecl::getSourceRange(); } bool ParmVarDecl::isDestroyedInCallee() const { // ns_consumed only affects code generation in ARC if (hasAttr()) return getASTContext().getLangOpts().ObjCAutoRefCount; // FIXME: isParamDestroyedInCallee() should probably imply // isDestructedType() const auto *RT = getType()->getAs(); if (RT && RT->getDecl()->isParamDestroyedInCallee() && getType().isDestructedType()) return true; return false; } Expr *ParmVarDecl::getDefaultArg() { assert(!hasUnparsedDefaultArg() && "Default argument is not yet parsed!"); assert(!hasUninstantiatedDefaultArg() && "Default argument is not yet instantiated!"); Expr *Arg = getInit(); if (auto *E = dyn_cast_if_present(Arg)) return E->getSubExpr(); return Arg; } void ParmVarDecl::setDefaultArg(Expr *defarg) { ParmVarDeclBits.DefaultArgKind = DAK_Normal; Init = defarg; } SourceRange ParmVarDecl::getDefaultArgRange() const { switch (ParmVarDeclBits.DefaultArgKind) { case DAK_None: case DAK_Unparsed: // Nothing we can do here. return SourceRange(); case DAK_Uninstantiated: return getUninstantiatedDefaultArg()->getSourceRange(); case DAK_Normal: if (const Expr *E = getInit()) return E->getSourceRange(); // Missing an actual expression, may be invalid. return SourceRange(); } llvm_unreachable("Invalid default argument kind."); } void ParmVarDecl::setUninstantiatedDefaultArg(Expr *arg) { ParmVarDeclBits.DefaultArgKind = DAK_Uninstantiated; Init = arg; } Expr *ParmVarDecl::getUninstantiatedDefaultArg() { assert(hasUninstantiatedDefaultArg() && "Wrong kind of initialization expression!"); return cast_if_present(Init.get()); } bool ParmVarDecl::hasDefaultArg() const { // FIXME: We should just return false for DAK_None here once callers are // prepared for the case that we encountered an invalid default argument and // were unable to even build an invalid expression. return hasUnparsedDefaultArg() || hasUninstantiatedDefaultArg() || !Init.isNull(); } void ParmVarDecl::setParameterIndexLarge(unsigned parameterIndex) { getASTContext().setParameterIndex(this, parameterIndex); ParmVarDeclBits.ParameterIndex = ParameterIndexSentinel; } unsigned ParmVarDecl::getParameterIndexLarge() const { return getASTContext().getParameterIndex(this); } //===----------------------------------------------------------------------===// // FunctionDecl Implementation //===----------------------------------------------------------------------===// FunctionDecl::FunctionDecl(Kind DK, ASTContext &C, DeclContext *DC, SourceLocation StartLoc, const DeclarationNameInfo &NameInfo, QualType T, TypeSourceInfo *TInfo, StorageClass S, bool UsesFPIntrin, bool isInlineSpecified, ConstexprSpecKind ConstexprKind, Expr *TrailingRequiresClause) : DeclaratorDecl(DK, DC, NameInfo.getLoc(), NameInfo.getName(), T, TInfo, StartLoc), DeclContext(DK), redeclarable_base(C), Body(), ODRHash(0), EndRangeLoc(NameInfo.getEndLoc()), DNLoc(NameInfo.getInfo()) { assert(T.isNull() || T->isFunctionType()); FunctionDeclBits.SClass = S; FunctionDeclBits.IsInline = isInlineSpecified; FunctionDeclBits.IsInlineSpecified = isInlineSpecified; FunctionDeclBits.IsVirtualAsWritten = false; FunctionDeclBits.IsPureVirtual = false; FunctionDeclBits.HasInheritedPrototype = false; FunctionDeclBits.HasWrittenPrototype = true; FunctionDeclBits.IsDeleted = false; FunctionDeclBits.IsTrivial = false; FunctionDeclBits.IsTrivialForCall = false; FunctionDeclBits.IsDefaulted = false; FunctionDeclBits.IsExplicitlyDefaulted = false; FunctionDeclBits.HasDefaultedOrDeletedInfo = false; FunctionDeclBits.IsIneligibleOrNotSelected = false; FunctionDeclBits.HasImplicitReturnZero = false; FunctionDeclBits.IsLateTemplateParsed = false; FunctionDeclBits.ConstexprKind = static_cast(ConstexprKind); FunctionDeclBits.BodyContainsImmediateEscalatingExpression = false; FunctionDeclBits.InstantiationIsPending = false; FunctionDeclBits.UsesSEHTry = false; FunctionDeclBits.UsesFPIntrin = UsesFPIntrin; FunctionDeclBits.HasSkippedBody = false; FunctionDeclBits.WillHaveBody = false; FunctionDeclBits.IsMultiVersion = false; FunctionDeclBits.DeductionCandidateKind = static_cast(DeductionCandidate::Normal); FunctionDeclBits.HasODRHash = false; FunctionDeclBits.FriendConstraintRefersToEnclosingTemplate = false; if (TrailingRequiresClause) setTrailingRequiresClause(TrailingRequiresClause); } void FunctionDecl::getNameForDiagnostic( raw_ostream &OS, const PrintingPolicy &Policy, bool Qualified) const { NamedDecl::getNameForDiagnostic(OS, Policy, Qualified); const TemplateArgumentList *TemplateArgs = getTemplateSpecializationArgs(); if (TemplateArgs) printTemplateArgumentList(OS, TemplateArgs->asArray(), Policy); } bool FunctionDecl::isVariadic() const { if (const auto *FT = getType()->getAs()) return FT->isVariadic(); return false; } FunctionDecl::DefaultedOrDeletedFunctionInfo * FunctionDecl::DefaultedOrDeletedFunctionInfo::Create( ASTContext &Context, ArrayRef Lookups, StringLiteral *DeletedMessage) { static constexpr size_t Alignment = std::max({alignof(DefaultedOrDeletedFunctionInfo), alignof(DeclAccessPair), alignof(StringLiteral *)}); size_t Size = totalSizeToAlloc( Lookups.size(), DeletedMessage != nullptr); DefaultedOrDeletedFunctionInfo *Info = new (Context.Allocate(Size, Alignment)) DefaultedOrDeletedFunctionInfo; Info->NumLookups = Lookups.size(); Info->HasDeletedMessage = DeletedMessage != nullptr; std::uninitialized_copy(Lookups.begin(), Lookups.end(), Info->getTrailingObjects()); if (DeletedMessage) *Info->getTrailingObjects() = DeletedMessage; return Info; } void FunctionDecl::setDefaultedOrDeletedInfo( DefaultedOrDeletedFunctionInfo *Info) { assert(!FunctionDeclBits.HasDefaultedOrDeletedInfo && "already have this"); assert(!Body && "can't replace function body with defaulted function info"); FunctionDeclBits.HasDefaultedOrDeletedInfo = true; DefaultedOrDeletedInfo = Info; } void FunctionDecl::setDeletedAsWritten(bool D, StringLiteral *Message) { FunctionDeclBits.IsDeleted = D; if (Message) { assert(isDeletedAsWritten() && "Function must be deleted"); if (FunctionDeclBits.HasDefaultedOrDeletedInfo) DefaultedOrDeletedInfo->setDeletedMessage(Message); else setDefaultedOrDeletedInfo(DefaultedOrDeletedFunctionInfo::Create( getASTContext(), /*Lookups=*/{}, Message)); } } void FunctionDecl::DefaultedOrDeletedFunctionInfo::setDeletedMessage( StringLiteral *Message) { // We should never get here with the DefaultedOrDeletedInfo populated, but // no space allocated for the deleted message, since that would require // recreating this, but setDefaultedOrDeletedInfo() disallows overwriting // an already existing DefaultedOrDeletedFunctionInfo. assert(HasDeletedMessage && "No space to store a delete message in this DefaultedOrDeletedInfo"); *getTrailingObjects() = Message; } FunctionDecl::DefaultedOrDeletedFunctionInfo * FunctionDecl::getDefalutedOrDeletedInfo() const { return FunctionDeclBits.HasDefaultedOrDeletedInfo ? DefaultedOrDeletedInfo : nullptr; } bool FunctionDecl::hasBody(const FunctionDecl *&Definition) const { for (const auto *I : redecls()) { if (I->doesThisDeclarationHaveABody()) { Definition = I; return true; } } return false; } bool FunctionDecl::hasTrivialBody() const { const Stmt *S = getBody(); if (!S) { // Since we don't have a body for this function, we don't know if it's // trivial or not. return false; } if (isa(S) && cast(S)->body_empty()) return true; return false; } bool FunctionDecl::isThisDeclarationInstantiatedFromAFriendDefinition() const { if (!getFriendObjectKind()) return false; // Check for a friend function instantiated from a friend function // definition in a templated class. if (const FunctionDecl *InstantiatedFrom = getInstantiatedFromMemberFunction()) return InstantiatedFrom->getFriendObjectKind() && InstantiatedFrom->isThisDeclarationADefinition(); // Check for a friend function template instantiated from a friend // function template definition in a templated class. if (const FunctionTemplateDecl *Template = getDescribedFunctionTemplate()) { if (const FunctionTemplateDecl *InstantiatedFrom = Template->getInstantiatedFromMemberTemplate()) return InstantiatedFrom->getFriendObjectKind() && InstantiatedFrom->isThisDeclarationADefinition(); } return false; } bool FunctionDecl::isDefined(const FunctionDecl *&Definition, bool CheckForPendingFriendDefinition) const { for (const FunctionDecl *FD : redecls()) { if (FD->isThisDeclarationADefinition()) { Definition = FD; return true; } // If this is a friend function defined in a class template, it does not // have a body until it is used, nevertheless it is a definition, see // [temp.inst]p2: // // ... for the purpose of determining whether an instantiated redeclaration // is valid according to [basic.def.odr] and [class.mem], a declaration that // corresponds to a definition in the template is considered to be a // definition. // // The following code must produce redefinition error: // // template struct C20 { friend void func_20() {} }; // C20 c20i; // void func_20() {} // if (CheckForPendingFriendDefinition && FD->isThisDeclarationInstantiatedFromAFriendDefinition()) { Definition = FD; return true; } } return false; } Stmt *FunctionDecl::getBody(const FunctionDecl *&Definition) const { if (!hasBody(Definition)) return nullptr; assert(!Definition->FunctionDeclBits.HasDefaultedOrDeletedInfo && "definition should not have a body"); if (Definition->Body) return Definition->Body.get(getASTContext().getExternalSource()); return nullptr; } void FunctionDecl::setBody(Stmt *B) { FunctionDeclBits.HasDefaultedOrDeletedInfo = false; Body = LazyDeclStmtPtr(B); if (B) EndRangeLoc = B->getEndLoc(); } void FunctionDecl::setIsPureVirtual(bool P) { FunctionDeclBits.IsPureVirtual = P; if (P) if (auto *Parent = dyn_cast(getDeclContext())) Parent->markedVirtualFunctionPure(); } template static bool isNamed(const NamedDecl *ND, const char (&Str)[Len]) { const IdentifierInfo *II = ND->getIdentifier(); return II && II->isStr(Str); } bool FunctionDecl::isImmediateEscalating() const { // C++23 [expr.const]/p17 // An immediate-escalating function is // - the call operator of a lambda that is not declared with the consteval // specifier, if (isLambdaCallOperator(this) && !isConsteval()) return true; // - a defaulted special member function that is not declared with the // consteval specifier, if (isDefaulted() && !isConsteval()) return true; // - a function that results from the instantiation of a templated entity // defined with the constexpr specifier. TemplatedKind TK = getTemplatedKind(); if (TK != TK_NonTemplate && TK != TK_DependentNonTemplate && isConstexprSpecified()) return true; return false; } bool FunctionDecl::isImmediateFunction() const { // C++23 [expr.const]/p18 // An immediate function is a function or constructor that is // - declared with the consteval specifier if (isConsteval()) return true; // - an immediate-escalating function F whose function body contains an // immediate-escalating expression if (isImmediateEscalating() && BodyContainsImmediateEscalatingExpressions()) return true; if (const auto *MD = dyn_cast(this); MD && MD->isLambdaStaticInvoker()) return MD->getParent()->getLambdaCallOperator()->isImmediateFunction(); return false; } bool FunctionDecl::isMain() const { const TranslationUnitDecl *tunit = dyn_cast(getDeclContext()->getRedeclContext()); return tunit && !tunit->getASTContext().getLangOpts().Freestanding && isNamed(this, "main"); } bool FunctionDecl::isMSVCRTEntryPoint() const { const TranslationUnitDecl *TUnit = dyn_cast(getDeclContext()->getRedeclContext()); if (!TUnit) return false; // Even though we aren't really targeting MSVCRT if we are freestanding, // semantic analysis for these functions remains the same. // MSVCRT entry points only exist on MSVCRT targets. if (!TUnit->getASTContext().getTargetInfo().getTriple().isOSMSVCRT()) return false; // Nameless functions like constructors cannot be entry points. if (!getIdentifier()) return false; return llvm::StringSwitch(getName()) .Cases("main", // an ANSI console app "wmain", // a Unicode console App "WinMain", // an ANSI GUI app "wWinMain", // a Unicode GUI app "DllMain", // a DLL true) .Default(false); } bool FunctionDecl::isReservedGlobalPlacementOperator() const { if (getDeclName().getNameKind() != DeclarationName::CXXOperatorName) return false; if (getDeclName().getCXXOverloadedOperator() != OO_New && getDeclName().getCXXOverloadedOperator() != OO_Delete && getDeclName().getCXXOverloadedOperator() != OO_Array_New && getDeclName().getCXXOverloadedOperator() != OO_Array_Delete) return false; if (!getDeclContext()->getRedeclContext()->isTranslationUnit()) return false; const auto *proto = getType()->castAs(); if (proto->getNumParams() != 2 || proto->isVariadic()) return false; const ASTContext &Context = cast(getDeclContext()->getRedeclContext()) ->getASTContext(); // The result type and first argument type are constant across all // these operators. The second argument must be exactly void*. return (proto->getParamType(1).getCanonicalType() == Context.VoidPtrTy); } bool FunctionDecl::isReplaceableGlobalAllocationFunction( std::optional *AlignmentParam, bool *IsNothrow) const { if (getDeclName().getNameKind() != DeclarationName::CXXOperatorName) return false; if (getDeclName().getCXXOverloadedOperator() != OO_New && getDeclName().getCXXOverloadedOperator() != OO_Delete && getDeclName().getCXXOverloadedOperator() != OO_Array_New && getDeclName().getCXXOverloadedOperator() != OO_Array_Delete) return false; if (isa(getDeclContext())) return false; // This can only fail for an invalid 'operator new' declaration. if (!getDeclContext()->getRedeclContext()->isTranslationUnit()) return false; const auto *FPT = getType()->castAs(); if (FPT->getNumParams() == 0 || FPT->getNumParams() > 4 || FPT->isVariadic()) return false; // If this is a single-parameter function, it must be a replaceable global // allocation or deallocation function. if (FPT->getNumParams() == 1) return true; unsigned Params = 1; QualType Ty = FPT->getParamType(Params); const ASTContext &Ctx = getASTContext(); auto Consume = [&] { ++Params; Ty = Params < FPT->getNumParams() ? FPT->getParamType(Params) : QualType(); }; // In C++14, the next parameter can be a 'std::size_t' for sized delete. bool IsSizedDelete = false; if (Ctx.getLangOpts().SizedDeallocation && (getDeclName().getCXXOverloadedOperator() == OO_Delete || getDeclName().getCXXOverloadedOperator() == OO_Array_Delete) && Ctx.hasSameType(Ty, Ctx.getSizeType())) { IsSizedDelete = true; Consume(); } // In C++17, the next parameter can be a 'std::align_val_t' for aligned // new/delete. if (Ctx.getLangOpts().AlignedAllocation && !Ty.isNull() && Ty->isAlignValT()) { Consume(); if (AlignmentParam) *AlignmentParam = Params; } // If this is not a sized delete, the next parameter can be a // 'const std::nothrow_t&'. if (!IsSizedDelete && !Ty.isNull() && Ty->isReferenceType()) { Ty = Ty->getPointeeType(); if (Ty.getCVRQualifiers() != Qualifiers::Const) return false; if (Ty->isNothrowT()) { if (IsNothrow) *IsNothrow = true; Consume(); } } // Finally, recognize the not yet standard versions of new that take a // hot/cold allocation hint (__hot_cold_t). These are currently supported by // tcmalloc (see // https://github.com/google/tcmalloc/blob/220043886d4e2efff7a5702d5172cb8065253664/tcmalloc/malloc_extension.h#L53). if (!IsSizedDelete && !Ty.isNull() && Ty->isEnumeralType()) { QualType T = Ty; while (const auto *TD = T->getAs()) T = TD->getDecl()->getUnderlyingType(); const IdentifierInfo *II = T->castAs()->getDecl()->getIdentifier(); if (II && II->isStr("__hot_cold_t")) Consume(); } return Params == FPT->getNumParams(); } bool FunctionDecl::isInlineBuiltinDeclaration() const { if (!getBuiltinID()) return false; const FunctionDecl *Definition; if (!hasBody(Definition)) return false; if (!Definition->isInlineSpecified() || !Definition->hasAttr()) return false; ASTContext &Context = getASTContext(); switch (Context.GetGVALinkageForFunction(Definition)) { case GVA_Internal: case GVA_DiscardableODR: case GVA_StrongODR: return false; case GVA_AvailableExternally: case GVA_StrongExternal: return true; } llvm_unreachable("Unknown GVALinkage"); } bool FunctionDecl::isDestroyingOperatorDelete() const { // C++ P0722: // Within a class C, a single object deallocation function with signature // (T, std::destroying_delete_t, ) // is a destroying operator delete. if (!isa(this) || getOverloadedOperator() != OO_Delete || getNumParams() < 2) return false; auto *RD = getParamDecl(1)->getType()->getAsCXXRecordDecl(); return RD && RD->isInStdNamespace() && RD->getIdentifier() && RD->getIdentifier()->isStr("destroying_delete_t"); } LanguageLinkage FunctionDecl::getLanguageLinkage() const { return getDeclLanguageLinkage(*this); } bool FunctionDecl::isExternC() const { return isDeclExternC(*this); } bool FunctionDecl::isInExternCContext() const { if (hasAttr()) return true; return getLexicalDeclContext()->isExternCContext(); } bool FunctionDecl::isInExternCXXContext() const { return getLexicalDeclContext()->isExternCXXContext(); } bool FunctionDecl::isGlobal() const { if (const auto *Method = dyn_cast(this)) return Method->isStatic(); if (getCanonicalDecl()->getStorageClass() == SC_Static) return false; for (const DeclContext *DC = getDeclContext(); DC->isNamespace(); DC = DC->getParent()) { if (const auto *Namespace = cast(DC)) { if (!Namespace->getDeclName()) return false; } } return true; } bool FunctionDecl::isNoReturn() const { if (hasAttr() || hasAttr() || hasAttr()) return true; if (auto *FnTy = getType()->getAs()) return FnTy->getNoReturnAttr(); return false; } bool FunctionDecl::isMemberLikeConstrainedFriend() const { // C++20 [temp.friend]p9: // A non-template friend declaration with a requires-clause [or] // a friend function template with a constraint that depends on a template // parameter from an enclosing template [...] does not declare the same // function or function template as a declaration in any other scope. // If this isn't a friend then it's not a member-like constrained friend. if (!getFriendObjectKind()) { return false; } if (!getDescribedFunctionTemplate()) { // If these friends don't have constraints, they aren't constrained, and // thus don't fall under temp.friend p9. Else the simple presence of a // constraint makes them unique. return getTrailingRequiresClause(); } return FriendConstraintRefersToEnclosingTemplate(); } MultiVersionKind FunctionDecl::getMultiVersionKind() const { if (hasAttr()) return MultiVersionKind::Target; if (hasAttr()) return MultiVersionKind::TargetVersion; if (hasAttr()) return MultiVersionKind::CPUDispatch; if (hasAttr()) return MultiVersionKind::CPUSpecific; if (hasAttr()) return MultiVersionKind::TargetClones; return MultiVersionKind::None; } bool FunctionDecl::isCPUDispatchMultiVersion() const { return isMultiVersion() && hasAttr(); } bool FunctionDecl::isCPUSpecificMultiVersion() const { return isMultiVersion() && hasAttr(); } bool FunctionDecl::isTargetMultiVersion() const { return isMultiVersion() && (hasAttr() || hasAttr()); } bool FunctionDecl::isTargetMultiVersionDefault() const { if (!isMultiVersion()) return false; if (hasAttr()) return getAttr()->isDefaultVersion(); return hasAttr() && getAttr()->isDefaultVersion(); } bool FunctionDecl::isTargetClonesMultiVersion() const { return isMultiVersion() && hasAttr(); } bool FunctionDecl::isTargetVersionMultiVersion() const { return isMultiVersion() && hasAttr(); } void FunctionDecl::setPreviousDeclaration(FunctionDecl *PrevDecl) { redeclarable_base::setPreviousDecl(PrevDecl); if (FunctionTemplateDecl *FunTmpl = getDescribedFunctionTemplate()) { FunctionTemplateDecl *PrevFunTmpl = PrevDecl? PrevDecl->getDescribedFunctionTemplate() : nullptr; assert((!PrevDecl || PrevFunTmpl) && "Function/function template mismatch"); FunTmpl->setPreviousDecl(PrevFunTmpl); } if (PrevDecl && PrevDecl->isInlined()) setImplicitlyInline(true); } FunctionDecl *FunctionDecl::getCanonicalDecl() { return getFirstDecl(); } /// Returns a value indicating whether this function corresponds to a builtin /// function. /// /// The function corresponds to a built-in function if it is declared at /// translation scope or within an extern "C" block and its name matches with /// the name of a builtin. The returned value will be 0 for functions that do /// not correspond to a builtin, a value of type \c Builtin::ID if in the /// target-independent range \c [1,Builtin::First), or a target-specific builtin /// value. /// /// \param ConsiderWrapperFunctions If true, we should consider wrapper /// functions as their wrapped builtins. This shouldn't be done in general, but /// it's useful in Sema to diagnose calls to wrappers based on their semantics. unsigned FunctionDecl::getBuiltinID(bool ConsiderWrapperFunctions) const { unsigned BuiltinID = 0; if (const auto *ABAA = getAttr()) { BuiltinID = ABAA->getBuiltinName()->getBuiltinID(); } else if (const auto *BAA = getAttr()) { BuiltinID = BAA->getBuiltinName()->getBuiltinID(); } else if (const auto *A = getAttr()) { BuiltinID = A->getID(); } if (!BuiltinID) return 0; // If the function is marked "overloadable", it has a different mangled name // and is not the C library function. if (!ConsiderWrapperFunctions && hasAttr() && (!hasAttr() && !hasAttr())) return 0; const ASTContext &Context = getASTContext(); if (!Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID)) return BuiltinID; // This function has the name of a known C library // function. Determine whether it actually refers to the C library // function or whether it just has the same name. // If this is a static function, it's not a builtin. if (!ConsiderWrapperFunctions && getStorageClass() == SC_Static) return 0; // OpenCL v1.2 s6.9.f - The library functions defined in // the C99 standard headers are not available. if (Context.getLangOpts().OpenCL && Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID)) return 0; // CUDA does not have device-side standard library. printf and malloc are the // only special cases that are supported by device-side runtime. if (Context.getLangOpts().CUDA && hasAttr() && !hasAttr() && !(BuiltinID == Builtin::BIprintf || BuiltinID == Builtin::BImalloc)) return 0; // As AMDGCN implementation of OpenMP does not have a device-side standard // library, none of the predefined library functions except printf and malloc // should be treated as a builtin i.e. 0 should be returned for them. if (Context.getTargetInfo().getTriple().isAMDGCN() && Context.getLangOpts().OpenMPIsTargetDevice && Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID) && !(BuiltinID == Builtin::BIprintf || BuiltinID == Builtin::BImalloc)) return 0; return BuiltinID; } /// getNumParams - Return the number of parameters this function must have /// based on its FunctionType. This is the length of the ParamInfo array /// after it has been created. unsigned FunctionDecl::getNumParams() const { const auto *FPT = getType()->getAs(); return FPT ? FPT->getNumParams() : 0; } void FunctionDecl::setParams(ASTContext &C, ArrayRef NewParamInfo) { assert(!ParamInfo && "Already has param info!"); assert(NewParamInfo.size() == getNumParams() && "Parameter count mismatch!"); // Zero params -> null pointer. if (!NewParamInfo.empty()) { ParamInfo = new (C) ParmVarDecl*[NewParamInfo.size()]; std::copy(NewParamInfo.begin(), NewParamInfo.end(), ParamInfo); } } /// getMinRequiredArguments - Returns the minimum number of arguments /// needed to call this function. This may be fewer than the number of /// function parameters, if some of the parameters have default /// arguments (in C++) or are parameter packs (C++11). unsigned FunctionDecl::getMinRequiredArguments() const { if (!getASTContext().getLangOpts().CPlusPlus) return getNumParams(); // Note that it is possible for a parameter with no default argument to // follow a parameter with a default argument. unsigned NumRequiredArgs = 0; unsigned MinParamsSoFar = 0; for (auto *Param : parameters()) { if (!Param->isParameterPack()) { ++MinParamsSoFar; if (!Param->hasDefaultArg()) NumRequiredArgs = MinParamsSoFar; } } return NumRequiredArgs; } bool FunctionDecl::hasCXXExplicitFunctionObjectParameter() const { return getNumParams() != 0 && getParamDecl(0)->isExplicitObjectParameter(); } unsigned FunctionDecl::getNumNonObjectParams() const { return getNumParams() - static_cast(hasCXXExplicitFunctionObjectParameter()); } unsigned FunctionDecl::getMinRequiredExplicitArguments() const { return getMinRequiredArguments() - static_cast(hasCXXExplicitFunctionObjectParameter()); } bool FunctionDecl::hasOneParamOrDefaultArgs() const { return getNumParams() == 1 || (getNumParams() > 1 && llvm::all_of(llvm::drop_begin(parameters()), [](ParmVarDecl *P) { return P->hasDefaultArg(); })); } /// The combination of the extern and inline keywords under MSVC forces /// the function to be required. /// /// Note: This function assumes that we will only get called when isInlined() /// would return true for this FunctionDecl. bool FunctionDecl::isMSExternInline() const { assert(isInlined() && "expected to get called on an inlined function!"); const ASTContext &Context = getASTContext(); if (!Context.getTargetInfo().getCXXABI().isMicrosoft() && !hasAttr()) return false; for (const FunctionDecl *FD = getMostRecentDecl(); FD; FD = FD->getPreviousDecl()) if (!FD->isImplicit() && FD->getStorageClass() == SC_Extern) return true; return false; } static bool redeclForcesDefMSVC(const FunctionDecl *Redecl) { if (Redecl->getStorageClass() != SC_Extern) return false; for (const FunctionDecl *FD = Redecl->getPreviousDecl(); FD; FD = FD->getPreviousDecl()) if (!FD->isImplicit() && FD->getStorageClass() == SC_Extern) return false; return true; } static bool RedeclForcesDefC99(const FunctionDecl *Redecl) { // Only consider file-scope declarations in this test. if (!Redecl->getLexicalDeclContext()->isTranslationUnit()) return false; // Only consider explicit declarations; the presence of a builtin for a // libcall shouldn't affect whether a definition is externally visible. if (Redecl->isImplicit()) return false; if (!Redecl->isInlineSpecified() || Redecl->getStorageClass() == SC_Extern) return true; // Not an inline definition return false; } /// For a function declaration in C or C++, determine whether this /// declaration causes the definition to be externally visible. /// /// For instance, this determines if adding the current declaration to the set /// of redeclarations of the given functions causes /// isInlineDefinitionExternallyVisible to change from false to true. bool FunctionDecl::doesDeclarationForceExternallyVisibleDefinition() const { assert(!doesThisDeclarationHaveABody() && "Must have a declaration without a body."); const ASTContext &Context = getASTContext(); if (Context.getLangOpts().MSVCCompat) { const FunctionDecl *Definition; if (hasBody(Definition) && Definition->isInlined() && redeclForcesDefMSVC(this)) return true; } if (Context.getLangOpts().CPlusPlus) return false; if (Context.getLangOpts().GNUInline || hasAttr()) { // With GNU inlining, a declaration with 'inline' but not 'extern', forces // an externally visible definition. // // FIXME: What happens if gnu_inline gets added on after the first // declaration? if (!isInlineSpecified() || getStorageClass() == SC_Extern) return false; const FunctionDecl *Prev = this; bool FoundBody = false; while ((Prev = Prev->getPreviousDecl())) { FoundBody |= Prev->doesThisDeclarationHaveABody(); if (Prev->doesThisDeclarationHaveABody()) { // If it's not the case that both 'inline' and 'extern' are // specified on the definition, then it is always externally visible. if (!Prev->isInlineSpecified() || Prev->getStorageClass() != SC_Extern) return false; } else if (Prev->isInlineSpecified() && Prev->getStorageClass() != SC_Extern) { return false; } } return FoundBody; } // C99 6.7.4p6: // [...] If all of the file scope declarations for a function in a // translation unit include the inline function specifier without extern, // then the definition in that translation unit is an inline definition. if (isInlineSpecified() && getStorageClass() != SC_Extern) return false; const FunctionDecl *Prev = this; bool FoundBody = false; while ((Prev = Prev->getPreviousDecl())) { FoundBody |= Prev->doesThisDeclarationHaveABody(); if (RedeclForcesDefC99(Prev)) return false; } return FoundBody; } FunctionTypeLoc FunctionDecl::getFunctionTypeLoc() const { const TypeSourceInfo *TSI = getTypeSourceInfo(); return TSI ? TSI->getTypeLoc().IgnoreParens().getAs() : FunctionTypeLoc(); } SourceRange FunctionDecl::getReturnTypeSourceRange() const { FunctionTypeLoc FTL = getFunctionTypeLoc(); if (!FTL) return SourceRange(); // Skip self-referential return types. const SourceManager &SM = getASTContext().getSourceManager(); SourceRange RTRange = FTL.getReturnLoc().getSourceRange(); SourceLocation Boundary = getNameInfo().getBeginLoc(); if (RTRange.isInvalid() || Boundary.isInvalid() || !SM.isBeforeInTranslationUnit(RTRange.getEnd(), Boundary)) return SourceRange(); return RTRange; } SourceRange FunctionDecl::getParametersSourceRange() const { unsigned NP = getNumParams(); SourceLocation EllipsisLoc = getEllipsisLoc(); if (NP == 0 && EllipsisLoc.isInvalid()) return SourceRange(); SourceLocation Begin = NP > 0 ? ParamInfo[0]->getSourceRange().getBegin() : EllipsisLoc; SourceLocation End = EllipsisLoc.isValid() ? EllipsisLoc : ParamInfo[NP - 1]->getSourceRange().getEnd(); return SourceRange(Begin, End); } SourceRange FunctionDecl::getExceptionSpecSourceRange() const { FunctionTypeLoc FTL = getFunctionTypeLoc(); return FTL ? FTL.getExceptionSpecRange() : SourceRange(); } /// For an inline function definition in C, or for a gnu_inline function /// in C++, determine whether the definition will be externally visible. /// /// Inline function definitions are always available for inlining optimizations. /// However, depending on the language dialect, declaration specifiers, and /// attributes, the definition of an inline function may or may not be /// "externally" visible to other translation units in the program. /// /// In C99, inline definitions are not externally visible by default. However, /// if even one of the global-scope declarations is marked "extern inline", the /// inline definition becomes externally visible (C99 6.7.4p6). /// /// In GNU89 mode, or if the gnu_inline attribute is attached to the function /// definition, we use the GNU semantics for inline, which are nearly the /// opposite of C99 semantics. In particular, "inline" by itself will create /// an externally visible symbol, but "extern inline" will not create an /// externally visible symbol. bool FunctionDecl::isInlineDefinitionExternallyVisible() const { assert((doesThisDeclarationHaveABody() || willHaveBody() || hasAttr()) && "Must be a function definition"); assert(isInlined() && "Function must be inline"); ASTContext &Context = getASTContext(); if (Context.getLangOpts().GNUInline || hasAttr()) { // Note: If you change the logic here, please change // doesDeclarationForceExternallyVisibleDefinition as well. // // If it's not the case that both 'inline' and 'extern' are // specified on the definition, then this inline definition is // externally visible. if (Context.getLangOpts().CPlusPlus) return false; if (!(isInlineSpecified() && getStorageClass() == SC_Extern)) return true; // If any declaration is 'inline' but not 'extern', then this definition // is externally visible. for (auto *Redecl : redecls()) { if (Redecl->isInlineSpecified() && Redecl->getStorageClass() != SC_Extern) return true; } return false; } // The rest of this function is C-only. assert(!Context.getLangOpts().CPlusPlus && "should not use C inline rules in C++"); // C99 6.7.4p6: // [...] If all of the file scope declarations for a function in a // translation unit include the inline function specifier without extern, // then the definition in that translation unit is an inline definition. for (auto *Redecl : redecls()) { if (RedeclForcesDefC99(Redecl)) return true; } // C99 6.7.4p6: // An inline definition does not provide an external definition for the // function, and does not forbid an external definition in another // translation unit. return false; } /// getOverloadedOperator - Which C++ overloaded operator this /// function represents, if any. OverloadedOperatorKind FunctionDecl::getOverloadedOperator() const { if (getDeclName().getNameKind() == DeclarationName::CXXOperatorName) return getDeclName().getCXXOverloadedOperator(); return OO_None; } /// getLiteralIdentifier - The literal suffix identifier this function /// represents, if any. const IdentifierInfo *FunctionDecl::getLiteralIdentifier() const { if (getDeclName().getNameKind() == DeclarationName::CXXLiteralOperatorName) return getDeclName().getCXXLiteralIdentifier(); return nullptr; } FunctionDecl::TemplatedKind FunctionDecl::getTemplatedKind() const { if (TemplateOrSpecialization.isNull()) return TK_NonTemplate; if (const auto *ND = TemplateOrSpecialization.dyn_cast()) { if (isa(ND)) return TK_DependentNonTemplate; assert(isa(ND) && "No other valid types in NamedDecl"); return TK_FunctionTemplate; } if (TemplateOrSpecialization.is()) return TK_MemberSpecialization; if (TemplateOrSpecialization.is()) return TK_FunctionTemplateSpecialization; if (TemplateOrSpecialization.is ()) return TK_DependentFunctionTemplateSpecialization; llvm_unreachable("Did we miss a TemplateOrSpecialization type?"); } FunctionDecl *FunctionDecl::getInstantiatedFromMemberFunction() const { if (MemberSpecializationInfo *Info = getMemberSpecializationInfo()) return cast(Info->getInstantiatedFrom()); return nullptr; } MemberSpecializationInfo *FunctionDecl::getMemberSpecializationInfo() const { if (auto *MSI = TemplateOrSpecialization.dyn_cast()) return MSI; if (auto *FTSI = TemplateOrSpecialization .dyn_cast()) return FTSI->getMemberSpecializationInfo(); return nullptr; } void FunctionDecl::setInstantiationOfMemberFunction(ASTContext &C, FunctionDecl *FD, TemplateSpecializationKind TSK) { assert(TemplateOrSpecialization.isNull() && "Member function is already a specialization"); MemberSpecializationInfo *Info = new (C) MemberSpecializationInfo(FD, TSK); TemplateOrSpecialization = Info; } FunctionTemplateDecl *FunctionDecl::getDescribedFunctionTemplate() const { return dyn_cast_if_present( TemplateOrSpecialization.dyn_cast()); } void FunctionDecl::setDescribedFunctionTemplate( FunctionTemplateDecl *Template) { assert(TemplateOrSpecialization.isNull() && "Member function is already a specialization"); TemplateOrSpecialization = Template; } bool FunctionDecl::isFunctionTemplateSpecialization() const { return TemplateOrSpecialization.is() || TemplateOrSpecialization .is(); } void FunctionDecl::setInstantiatedFromDecl(FunctionDecl *FD) { assert(TemplateOrSpecialization.isNull() && "Function is already a specialization"); TemplateOrSpecialization = FD; } FunctionDecl *FunctionDecl::getInstantiatedFromDecl() const { return dyn_cast_if_present( TemplateOrSpecialization.dyn_cast()); } bool FunctionDecl::isImplicitlyInstantiable() const { // If the function is invalid, it can't be implicitly instantiated. if (isInvalidDecl()) return false; switch (getTemplateSpecializationKindForInstantiation()) { case TSK_Undeclared: case TSK_ExplicitInstantiationDefinition: case TSK_ExplicitSpecialization: return false; case TSK_ImplicitInstantiation: return true; case TSK_ExplicitInstantiationDeclaration: // Handled below. break; } // Find the actual template from which we will instantiate. const FunctionDecl *PatternDecl = getTemplateInstantiationPattern(); bool HasPattern = false; if (PatternDecl) HasPattern = PatternDecl->hasBody(PatternDecl); // C++0x [temp.explicit]p9: // Except for inline functions, other explicit instantiation declarations // have the effect of suppressing the implicit instantiation of the entity // to which they refer. if (!HasPattern || !PatternDecl) return true; return PatternDecl->isInlined(); } bool FunctionDecl::isTemplateInstantiation() const { // FIXME: Remove this, it's not clear what it means. (Which template // specialization kind?) return clang::isTemplateInstantiation(getTemplateSpecializationKind()); } FunctionDecl * FunctionDecl::getTemplateInstantiationPattern(bool ForDefinition) const { // If this is a generic lambda call operator specialization, its // instantiation pattern is always its primary template's pattern // even if its primary template was instantiated from another // member template (which happens with nested generic lambdas). // Since a lambda's call operator's body is transformed eagerly, // we don't have to go hunting for a prototype definition template // (i.e. instantiated-from-member-template) to use as an instantiation // pattern. if (isGenericLambdaCallOperatorSpecialization( dyn_cast(this))) { assert(getPrimaryTemplate() && "not a generic lambda call operator?"); return getDefinitionOrSelf(getPrimaryTemplate()->getTemplatedDecl()); } // Check for a declaration of this function that was instantiated from a // friend definition. const FunctionDecl *FD = nullptr; if (!isDefined(FD, /*CheckForPendingFriendDefinition=*/true)) FD = this; if (MemberSpecializationInfo *Info = FD->getMemberSpecializationInfo()) { if (ForDefinition && !clang::isTemplateInstantiation(Info->getTemplateSpecializationKind())) return nullptr; return getDefinitionOrSelf(cast(Info->getInstantiatedFrom())); } if (ForDefinition && !clang::isTemplateInstantiation(getTemplateSpecializationKind())) return nullptr; if (FunctionTemplateDecl *Primary = getPrimaryTemplate()) { // If we hit a point where the user provided a specialization of this // template, we're done looking. while (!ForDefinition || !Primary->isMemberSpecialization()) { auto *NewPrimary = Primary->getInstantiatedFromMemberTemplate(); if (!NewPrimary) break; Primary = NewPrimary; } return getDefinitionOrSelf(Primary->getTemplatedDecl()); } return nullptr; } FunctionTemplateDecl *FunctionDecl::getPrimaryTemplate() const { if (FunctionTemplateSpecializationInfo *Info = TemplateOrSpecialization .dyn_cast()) { return Info->getTemplate(); } return nullptr; } FunctionTemplateSpecializationInfo * FunctionDecl::getTemplateSpecializationInfo() const { return TemplateOrSpecialization .dyn_cast(); } const TemplateArgumentList * FunctionDecl::getTemplateSpecializationArgs() const { if (FunctionTemplateSpecializationInfo *Info = TemplateOrSpecialization .dyn_cast()) { return Info->TemplateArguments; } return nullptr; } const ASTTemplateArgumentListInfo * FunctionDecl::getTemplateSpecializationArgsAsWritten() const { if (FunctionTemplateSpecializationInfo *Info = TemplateOrSpecialization .dyn_cast()) { return Info->TemplateArgumentsAsWritten; } if (DependentFunctionTemplateSpecializationInfo *Info = TemplateOrSpecialization .dyn_cast()) { return Info->TemplateArgumentsAsWritten; } return nullptr; } void FunctionDecl::setFunctionTemplateSpecialization( ASTContext &C, FunctionTemplateDecl *Template, TemplateArgumentList *TemplateArgs, void *InsertPos, TemplateSpecializationKind TSK, const TemplateArgumentListInfo *TemplateArgsAsWritten, SourceLocation PointOfInstantiation) { assert((TemplateOrSpecialization.isNull() || TemplateOrSpecialization.is()) && "Member function is already a specialization"); assert(TSK != TSK_Undeclared && "Must specify the type of function template specialization"); assert((TemplateOrSpecialization.isNull() || getFriendObjectKind() != FOK_None || TSK == TSK_ExplicitSpecialization) && "Member specialization must be an explicit specialization"); FunctionTemplateSpecializationInfo *Info = FunctionTemplateSpecializationInfo::Create( C, this, Template, TSK, TemplateArgs, TemplateArgsAsWritten, PointOfInstantiation, TemplateOrSpecialization.dyn_cast()); TemplateOrSpecialization = Info; Template->addSpecialization(Info, InsertPos); } void FunctionDecl::setDependentTemplateSpecialization( ASTContext &Context, const UnresolvedSetImpl &Templates, const TemplateArgumentListInfo *TemplateArgs) { assert(TemplateOrSpecialization.isNull()); DependentFunctionTemplateSpecializationInfo *Info = DependentFunctionTemplateSpecializationInfo::Create(Context, Templates, TemplateArgs); TemplateOrSpecialization = Info; } DependentFunctionTemplateSpecializationInfo * FunctionDecl::getDependentSpecializationInfo() const { return TemplateOrSpecialization .dyn_cast(); } DependentFunctionTemplateSpecializationInfo * DependentFunctionTemplateSpecializationInfo::Create( ASTContext &Context, const UnresolvedSetImpl &Candidates, const TemplateArgumentListInfo *TArgs) { const auto *TArgsWritten = TArgs ? ASTTemplateArgumentListInfo::Create(Context, *TArgs) : nullptr; return new (Context.Allocate( totalSizeToAlloc(Candidates.size()))) DependentFunctionTemplateSpecializationInfo(Candidates, TArgsWritten); } DependentFunctionTemplateSpecializationInfo:: DependentFunctionTemplateSpecializationInfo( const UnresolvedSetImpl &Candidates, const ASTTemplateArgumentListInfo *TemplateArgsWritten) : NumCandidates(Candidates.size()), TemplateArgumentsAsWritten(TemplateArgsWritten) { std::transform(Candidates.begin(), Candidates.end(), getTrailingObjects(), [](NamedDecl *ND) { return cast(ND->getUnderlyingDecl()); }); } TemplateSpecializationKind FunctionDecl::getTemplateSpecializationKind() const { // For a function template specialization, query the specialization // information object. if (FunctionTemplateSpecializationInfo *FTSInfo = TemplateOrSpecialization .dyn_cast()) return FTSInfo->getTemplateSpecializationKind(); if (MemberSpecializationInfo *MSInfo = TemplateOrSpecialization.dyn_cast()) return MSInfo->getTemplateSpecializationKind(); // A dependent function template specialization is an explicit specialization, // except when it's a friend declaration. if (TemplateOrSpecialization .is() && getFriendObjectKind() == FOK_None) return TSK_ExplicitSpecialization; return TSK_Undeclared; } TemplateSpecializationKind FunctionDecl::getTemplateSpecializationKindForInstantiation() const { // This is the same as getTemplateSpecializationKind(), except that for a // function that is both a function template specialization and a member // specialization, we prefer the member specialization information. Eg: // // template struct A { // template void f() {} // template<> void f() {} // }; // // Within the templated CXXRecordDecl, A::f is a dependent function // template specialization; both getTemplateSpecializationKind() and // getTemplateSpecializationKindForInstantiation() will return // TSK_ExplicitSpecialization. // // For A::f(): // * getTemplateSpecializationKind() will return TSK_ExplicitSpecialization // * getTemplateSpecializationKindForInstantiation() will return // TSK_ImplicitInstantiation // // This reflects the facts that A::f is an explicit specialization // of A::f, and that A::f should be implicitly instantiated // from A::f if a definition is needed. if (FunctionTemplateSpecializationInfo *FTSInfo = TemplateOrSpecialization .dyn_cast()) { if (auto *MSInfo = FTSInfo->getMemberSpecializationInfo()) return MSInfo->getTemplateSpecializationKind(); return FTSInfo->getTemplateSpecializationKind(); } if (MemberSpecializationInfo *MSInfo = TemplateOrSpecialization.dyn_cast()) return MSInfo->getTemplateSpecializationKind(); if (TemplateOrSpecialization .is() && getFriendObjectKind() == FOK_None) return TSK_ExplicitSpecialization; return TSK_Undeclared; } void FunctionDecl::setTemplateSpecializationKind(TemplateSpecializationKind TSK, SourceLocation PointOfInstantiation) { if (FunctionTemplateSpecializationInfo *FTSInfo = TemplateOrSpecialization.dyn_cast< FunctionTemplateSpecializationInfo*>()) { FTSInfo->setTemplateSpecializationKind(TSK); if (TSK != TSK_ExplicitSpecialization && PointOfInstantiation.isValid() && FTSInfo->getPointOfInstantiation().isInvalid()) { FTSInfo->setPointOfInstantiation(PointOfInstantiation); if (ASTMutationListener *L = getASTContext().getASTMutationListener()) L->InstantiationRequested(this); } } else if (MemberSpecializationInfo *MSInfo = TemplateOrSpecialization.dyn_cast()) { MSInfo->setTemplateSpecializationKind(TSK); if (TSK != TSK_ExplicitSpecialization && PointOfInstantiation.isValid() && MSInfo->getPointOfInstantiation().isInvalid()) { MSInfo->setPointOfInstantiation(PointOfInstantiation); if (ASTMutationListener *L = getASTContext().getASTMutationListener()) L->InstantiationRequested(this); } } else llvm_unreachable("Function cannot have a template specialization kind"); } SourceLocation FunctionDecl::getPointOfInstantiation() const { if (FunctionTemplateSpecializationInfo *FTSInfo = TemplateOrSpecialization.dyn_cast< FunctionTemplateSpecializationInfo*>()) return FTSInfo->getPointOfInstantiation(); if (MemberSpecializationInfo *MSInfo = TemplateOrSpecialization.dyn_cast()) return MSInfo->getPointOfInstantiation(); return SourceLocation(); } bool FunctionDecl::isOutOfLine() const { if (Decl::isOutOfLine()) return true; // If this function was instantiated from a member function of a // class template, check whether that member function was defined out-of-line. if (FunctionDecl *FD = getInstantiatedFromMemberFunction()) { const FunctionDecl *Definition; if (FD->hasBody(Definition)) return Definition->isOutOfLine(); } // If this function was instantiated from a function template, // check whether that function template was defined out-of-line. if (FunctionTemplateDecl *FunTmpl = getPrimaryTemplate()) { const FunctionDecl *Definition; if (FunTmpl->getTemplatedDecl()->hasBody(Definition)) return Definition->isOutOfLine(); } return false; } SourceRange FunctionDecl::getSourceRange() const { return SourceRange(getOuterLocStart(), EndRangeLoc); } unsigned FunctionDecl::getMemoryFunctionKind() const { IdentifierInfo *FnInfo = getIdentifier(); if (!FnInfo) return 0; // Builtin handling. switch (getBuiltinID()) { case Builtin::BI__builtin_memset: case Builtin::BI__builtin___memset_chk: case Builtin::BImemset: return Builtin::BImemset; case Builtin::BI__builtin_memcpy: case Builtin::BI__builtin___memcpy_chk: case Builtin::BImemcpy: return Builtin::BImemcpy; case Builtin::BI__builtin_mempcpy: case Builtin::BI__builtin___mempcpy_chk: case Builtin::BImempcpy: return Builtin::BImempcpy; case Builtin::BI__builtin_memmove: case Builtin::BI__builtin___memmove_chk: case Builtin::BImemmove: return Builtin::BImemmove; case Builtin::BIstrlcpy: case Builtin::BI__builtin___strlcpy_chk: return Builtin::BIstrlcpy; case Builtin::BIstrlcat: case Builtin::BI__builtin___strlcat_chk: return Builtin::BIstrlcat; case Builtin::BI__builtin_memcmp: case Builtin::BImemcmp: return Builtin::BImemcmp; case Builtin::BI__builtin_bcmp: case Builtin::BIbcmp: return Builtin::BIbcmp; case Builtin::BI__builtin_strncpy: case Builtin::BI__builtin___strncpy_chk: case Builtin::BIstrncpy: return Builtin::BIstrncpy; case Builtin::BI__builtin_strncmp: case Builtin::BIstrncmp: return Builtin::BIstrncmp; case Builtin::BI__builtin_strncasecmp: case Builtin::BIstrncasecmp: return Builtin::BIstrncasecmp; case Builtin::BI__builtin_strncat: case Builtin::BI__builtin___strncat_chk: case Builtin::BIstrncat: return Builtin::BIstrncat; case Builtin::BI__builtin_strndup: case Builtin::BIstrndup: return Builtin::BIstrndup; case Builtin::BI__builtin_strlen: case Builtin::BIstrlen: return Builtin::BIstrlen; case Builtin::BI__builtin_bzero: case Builtin::BIbzero: return Builtin::BIbzero; case Builtin::BI__builtin_bcopy: case Builtin::BIbcopy: return Builtin::BIbcopy; case Builtin::BIfree: return Builtin::BIfree; default: if (isExternC()) { if (FnInfo->isStr("memset")) return Builtin::BImemset; if (FnInfo->isStr("memcpy")) return Builtin::BImemcpy; if (FnInfo->isStr("mempcpy")) return Builtin::BImempcpy; if (FnInfo->isStr("memmove")) return Builtin::BImemmove; if (FnInfo->isStr("memcmp")) return Builtin::BImemcmp; if (FnInfo->isStr("bcmp")) return Builtin::BIbcmp; if (FnInfo->isStr("strncpy")) return Builtin::BIstrncpy; if (FnInfo->isStr("strncmp")) return Builtin::BIstrncmp; if (FnInfo->isStr("strncasecmp")) return Builtin::BIstrncasecmp; if (FnInfo->isStr("strncat")) return Builtin::BIstrncat; if (FnInfo->isStr("strndup")) return Builtin::BIstrndup; if (FnInfo->isStr("strlen")) return Builtin::BIstrlen; if (FnInfo->isStr("bzero")) return Builtin::BIbzero; if (FnInfo->isStr("bcopy")) return Builtin::BIbcopy; } else if (isInStdNamespace()) { if (FnInfo->isStr("free")) return Builtin::BIfree; } break; } return 0; } unsigned FunctionDecl::getODRHash() const { assert(hasODRHash()); return ODRHash; } unsigned FunctionDecl::getODRHash() { if (hasODRHash()) return ODRHash; if (auto *FT = getInstantiatedFromMemberFunction()) { setHasODRHash(true); ODRHash = FT->getODRHash(); return ODRHash; } class ODRHash Hash; Hash.AddFunctionDecl(this); setHasODRHash(true); ODRHash = Hash.CalculateHash(); return ODRHash; } //===----------------------------------------------------------------------===// // FieldDecl Implementation //===----------------------------------------------------------------------===// FieldDecl *FieldDecl::Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle) { return new (C, DC) FieldDecl(Decl::Field, DC, StartLoc, IdLoc, Id, T, TInfo, BW, Mutable, InitStyle); } FieldDecl *FieldDecl::CreateDeserialized(ASTContext &C, GlobalDeclID ID) { return new (C, ID) FieldDecl(Field, nullptr, SourceLocation(), SourceLocation(), nullptr, QualType(), nullptr, nullptr, false, ICIS_NoInit); } bool FieldDecl::isAnonymousStructOrUnion() const { if (!isImplicit() || getDeclName()) return false; if (const auto *Record = getType()->getAs()) return Record->getDecl()->isAnonymousStructOrUnion(); return false; } Expr *FieldDecl::getInClassInitializer() const { if (!hasInClassInitializer()) return nullptr; LazyDeclStmtPtr InitPtr = BitField ? InitAndBitWidth->Init : Init; return cast_if_present( InitPtr.isOffset() ? InitPtr.get(getASTContext().getExternalSource()) : InitPtr.get(nullptr)); } void FieldDecl::setInClassInitializer(Expr *NewInit) { setLazyInClassInitializer(LazyDeclStmtPtr(NewInit)); } void FieldDecl::setLazyInClassInitializer(LazyDeclStmtPtr NewInit) { assert(hasInClassInitializer() && !getInClassInitializer()); if (BitField) InitAndBitWidth->Init = NewInit; else Init = NewInit; } unsigned FieldDecl::getBitWidthValue(const ASTContext &Ctx) const { assert(isBitField() && "not a bitfield"); return getBitWidth()->EvaluateKnownConstInt(Ctx).getZExtValue(); } bool FieldDecl::isZeroLengthBitField(const ASTContext &Ctx) const { return isUnnamedBitField() && !getBitWidth()->isValueDependent() && getBitWidthValue(Ctx) == 0; } bool FieldDecl::isZeroSize(const ASTContext &Ctx) const { if (isZeroLengthBitField(Ctx)) return true; // C++2a [intro.object]p7: // An object has nonzero size if it // -- is not a potentially-overlapping subobject, or if (!hasAttr()) return false; // -- is not of class type, or const auto *RT = getType()->getAs(); if (!RT) return false; const RecordDecl *RD = RT->getDecl()->getDefinition(); if (!RD) { assert(isInvalidDecl() && "valid field has incomplete type"); return false; } // -- [has] virtual member functions or virtual base classes, or // -- has subobjects of nonzero size or bit-fields of nonzero length const auto *CXXRD = cast(RD); if (!CXXRD->isEmpty()) return false; // Otherwise, [...] the circumstances under which the object has zero size // are implementation-defined. if (!Ctx.getTargetInfo().getCXXABI().isMicrosoft()) return true; // MS ABI: has nonzero size if it is a class type with class type fields, // whether or not they have nonzero size return !llvm::any_of(CXXRD->fields(), [](const FieldDecl *Field) { return Field->getType()->getAs(); }); } bool FieldDecl::isPotentiallyOverlapping() const { return hasAttr() && getType()->getAsCXXRecordDecl(); } unsigned FieldDecl::getFieldIndex() const { const FieldDecl *Canonical = getCanonicalDecl(); if (Canonical != this) return Canonical->getFieldIndex(); if (CachedFieldIndex) return CachedFieldIndex - 1; unsigned Index = 0; const RecordDecl *RD = getParent()->getDefinition(); assert(RD && "requested index for field of struct with no definition"); for (auto *Field : RD->fields()) { Field->getCanonicalDecl()->CachedFieldIndex = Index + 1; assert(Field->getCanonicalDecl()->CachedFieldIndex == Index + 1 && "overflow in field numbering"); ++Index; } assert(CachedFieldIndex && "failed to find field in parent"); return CachedFieldIndex - 1; } SourceRange FieldDecl::getSourceRange() const { const Expr *FinalExpr = getInClassInitializer(); if (!FinalExpr) FinalExpr = getBitWidth(); if (FinalExpr) return SourceRange(getInnerLocStart(), FinalExpr->getEndLoc()); return DeclaratorDecl::getSourceRange(); } void FieldDecl::setCapturedVLAType(const VariableArrayType *VLAType) { assert((getParent()->isLambda() || getParent()->isCapturedRecord()) && "capturing type in non-lambda or captured record."); assert(StorageKind == ISK_NoInit && !BitField && "bit-field or field with default member initializer cannot capture " "VLA type"); StorageKind = ISK_CapturedVLAType; CapturedVLAType = VLAType; } void FieldDecl::printName(raw_ostream &OS, const PrintingPolicy &Policy) const { // Print unnamed members using name of their type. if (isAnonymousStructOrUnion()) { this->getType().print(OS, Policy); return; } // Otherwise, do the normal printing. DeclaratorDecl::printName(OS, Policy); } //===----------------------------------------------------------------------===// // TagDecl Implementation //===----------------------------------------------------------------------===// TagDecl::TagDecl(Kind DK, TagKind TK, const ASTContext &C, DeclContext *DC, SourceLocation L, IdentifierInfo *Id, TagDecl *PrevDecl, SourceLocation StartL) : TypeDecl(DK, DC, L, Id, StartL), DeclContext(DK), redeclarable_base(C), TypedefNameDeclOrQualifier((TypedefNameDecl *)nullptr) { assert((DK != Enum || TK == TagTypeKind::Enum) && "EnumDecl not matched with TagTypeKind::Enum"); setPreviousDecl(PrevDecl); setTagKind(TK); setCompleteDefinition(false); setBeingDefined(false); setEmbeddedInDeclarator(false); setFreeStanding(false); setCompleteDefinitionRequired(false); TagDeclBits.IsThisDeclarationADemotedDefinition = false; } SourceLocation TagDecl::getOuterLocStart() const { return getTemplateOrInnerLocStart(this); } SourceRange TagDecl::getSourceRange() const { SourceLocation RBraceLoc = BraceRange.getEnd(); SourceLocation E = RBraceLoc.isValid() ? RBraceLoc : getLocation(); return SourceRange(getOuterLocStart(), E); } TagDecl *TagDecl::getCanonicalDecl() { return getFirstDecl(); } void TagDecl::setTypedefNameForAnonDecl(TypedefNameDecl *TDD) { TypedefNameDeclOrQualifier = TDD; if (const Type *T = getTypeForDecl()) { (void)T; assert(T->isLinkageValid()); } assert(isLinkageValid()); } void TagDecl::startDefinition() { setBeingDefined(true); if (auto *D = dyn_cast(this)) { struct CXXRecordDecl::DefinitionData *Data = new (getASTContext()) struct CXXRecordDecl::DefinitionData(D); for (auto *I : redecls()) cast(I)->DefinitionData = Data; } } void TagDecl::completeDefinition() { assert((!isa(this) || cast(this)->hasDefinition()) && "definition completed but not started"); setCompleteDefinition(true); setBeingDefined(false); if (ASTMutationListener *L = getASTMutationListener()) L->CompletedTagDefinition(this); } TagDecl *TagDecl::getDefinition() const { if (isCompleteDefinition()) return const_cast(this); // If it's possible for us to have an out-of-date definition, check now. if (mayHaveOutOfDateDef()) { if (IdentifierInfo *II = getIdentifier()) { if (II->isOutOfDate()) { updateOutOfDate(*II); } } } if (const auto *CXXRD = dyn_cast(this)) return CXXRD->getDefinition(); for (auto *R : redecls()) if (R->isCompleteDefinition()) return R; return nullptr; } void TagDecl::setQualifierInfo(NestedNameSpecifierLoc QualifierLoc) { if (QualifierLoc) { // Make sure the extended qualifier info is allocated. if (!hasExtInfo()) TypedefNameDeclOrQualifier = new (getASTContext()) ExtInfo; // Set qualifier info. getExtInfo()->QualifierLoc = QualifierLoc; } else { // Here Qualifier == 0, i.e., we are removing the qualifier (if any). if (hasExtInfo()) { if (getExtInfo()->NumTemplParamLists == 0) { getASTContext().Deallocate(getExtInfo()); TypedefNameDeclOrQualifier = (TypedefNameDecl *)nullptr; } else getExtInfo()->QualifierLoc = QualifierLoc; } } } void TagDecl::printName(raw_ostream &OS, const PrintingPolicy &Policy) const { DeclarationName Name = getDeclName(); // If the name is supposed to have an identifier but does not have one, then // the tag is anonymous and we should print it differently. if (Name.isIdentifier() && !Name.getAsIdentifierInfo()) { // If the caller wanted to print a qualified name, they've already printed // the scope. And if the caller doesn't want that, the scope information // is already printed as part of the type. PrintingPolicy Copy(Policy); Copy.SuppressScope = true; getASTContext().getTagDeclType(this).print(OS, Copy); return; } // Otherwise, do the normal printing. Name.print(OS, Policy); } void TagDecl::setTemplateParameterListsInfo( ASTContext &Context, ArrayRef TPLists) { assert(!TPLists.empty()); // Make sure the extended decl info is allocated. if (!hasExtInfo()) // Allocate external info struct. TypedefNameDeclOrQualifier = new (getASTContext()) ExtInfo; // Set the template parameter lists info. getExtInfo()->setTemplateParameterListsInfo(Context, TPLists); } //===----------------------------------------------------------------------===// // EnumDecl Implementation //===----------------------------------------------------------------------===// EnumDecl::EnumDecl(ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, IdentifierInfo *Id, EnumDecl *PrevDecl, bool Scoped, bool ScopedUsingClassTag, bool Fixed) : TagDecl(Enum, TagTypeKind::Enum, C, DC, IdLoc, Id, PrevDecl, StartLoc) { assert(Scoped || !ScopedUsingClassTag); IntegerType = nullptr; setNumPositiveBits(0); setNumNegativeBits(0); setScoped(Scoped); setScopedUsingClassTag(ScopedUsingClassTag); setFixed(Fixed); setHasODRHash(false); ODRHash = 0; } void EnumDecl::anchor() {} EnumDecl *EnumDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, IdentifierInfo *Id, EnumDecl *PrevDecl, bool IsScoped, bool IsScopedUsingClassTag, bool IsFixed) { auto *Enum = new (C, DC) EnumDecl(C, DC, StartLoc, IdLoc, Id, PrevDecl, IsScoped, IsScopedUsingClassTag, IsFixed); Enum->setMayHaveOutOfDateDef(C.getLangOpts().Modules); C.getTypeDeclType(Enum, PrevDecl); return Enum; } EnumDecl *EnumDecl::CreateDeserialized(ASTContext &C, GlobalDeclID ID) { EnumDecl *Enum = new (C, ID) EnumDecl(C, nullptr, SourceLocation(), SourceLocation(), nullptr, nullptr, false, false, false); Enum->setMayHaveOutOfDateDef(C.getLangOpts().Modules); return Enum; } SourceRange EnumDecl::getIntegerTypeRange() const { if (const TypeSourceInfo *TI = getIntegerTypeSourceInfo()) return TI->getTypeLoc().getSourceRange(); return SourceRange(); } void EnumDecl::completeDefinition(QualType NewType, QualType NewPromotionType, unsigned NumPositiveBits, unsigned NumNegativeBits) { assert(!isCompleteDefinition() && "Cannot redefine enums!"); if (!IntegerType) IntegerType = NewType.getTypePtr(); PromotionType = NewPromotionType; setNumPositiveBits(NumPositiveBits); setNumNegativeBits(NumNegativeBits); TagDecl::completeDefinition(); } bool EnumDecl::isClosed() const { if (const auto *A = getAttr()) return A->getExtensibility() == EnumExtensibilityAttr::Closed; return true; } bool EnumDecl::isClosedFlag() const { return isClosed() && hasAttr(); } bool EnumDecl::isClosedNonFlag() const { return isClosed() && !hasAttr(); } TemplateSpecializationKind EnumDecl::getTemplateSpecializationKind() const { if (MemberSpecializationInfo *MSI = getMemberSpecializationInfo()) return MSI->getTemplateSpecializationKind(); return TSK_Undeclared; } void EnumDecl::setTemplateSpecializationKind(TemplateSpecializationKind TSK, SourceLocation PointOfInstantiation) { MemberSpecializationInfo *MSI = getMemberSpecializationInfo(); assert(MSI && "Not an instantiated member enumeration?"); MSI->setTemplateSpecializationKind(TSK); if (TSK != TSK_ExplicitSpecialization && PointOfInstantiation.isValid() && MSI->getPointOfInstantiation().isInvalid()) MSI->setPointOfInstantiation(PointOfInstantiation); } EnumDecl *EnumDecl::getTemplateInstantiationPattern() const { if (MemberSpecializationInfo *MSInfo = getMemberSpecializationInfo()) { if (isTemplateInstantiation(MSInfo->getTemplateSpecializationKind())) { EnumDecl *ED = getInstantiatedFromMemberEnum(); while (auto *NewED = ED->getInstantiatedFromMemberEnum()) ED = NewED; return getDefinitionOrSelf(ED); } } assert(!isTemplateInstantiation(getTemplateSpecializationKind()) && "couldn't find pattern for enum instantiation"); return nullptr; } EnumDecl *EnumDecl::getInstantiatedFromMemberEnum() const { if (SpecializationInfo) return cast(SpecializationInfo->getInstantiatedFrom()); return nullptr; } void EnumDecl::setInstantiationOfMemberEnum(ASTContext &C, EnumDecl *ED, TemplateSpecializationKind TSK) { assert(!SpecializationInfo && "Member enum is already a specialization"); SpecializationInfo = new (C) MemberSpecializationInfo(ED, TSK); } unsigned EnumDecl::getODRHash() { if (hasODRHash()) return ODRHash; class ODRHash Hash; Hash.AddEnumDecl(this); setHasODRHash(true); ODRHash = Hash.CalculateHash(); return ODRHash; } SourceRange EnumDecl::getSourceRange() const { auto Res = TagDecl::getSourceRange(); // Set end-point to enum-base, e.g. enum foo : ^bar if (auto *TSI = getIntegerTypeSourceInfo()) { // TagDecl doesn't know about the enum base. if (!getBraceRange().getEnd().isValid()) Res.setEnd(TSI->getTypeLoc().getEndLoc()); } return Res; } void EnumDecl::getValueRange(llvm::APInt &Max, llvm::APInt &Min) const { unsigned Bitwidth = getASTContext().getIntWidth(getIntegerType()); unsigned NumNegativeBits = getNumNegativeBits(); unsigned NumPositiveBits = getNumPositiveBits(); if (NumNegativeBits) { unsigned NumBits = std::max(NumNegativeBits, NumPositiveBits + 1); Max = llvm::APInt(Bitwidth, 1) << (NumBits - 1); Min = -Max; } else { Max = llvm::APInt(Bitwidth, 1) << NumPositiveBits; Min = llvm::APInt::getZero(Bitwidth); } } //===----------------------------------------------------------------------===// // RecordDecl Implementation //===----------------------------------------------------------------------===// RecordDecl::RecordDecl(Kind DK, TagKind TK, const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, IdentifierInfo *Id, RecordDecl *PrevDecl) : TagDecl(DK, TK, C, DC, IdLoc, Id, PrevDecl, StartLoc) { assert(classof(static_cast(this)) && "Invalid Kind!"); setHasFlexibleArrayMember(false); setAnonymousStructOrUnion(false); setHasObjectMember(false); setHasVolatileMember(false); setHasLoadedFieldsFromExternalStorage(false); setNonTrivialToPrimitiveDefaultInitialize(false); setNonTrivialToPrimitiveCopy(false); setNonTrivialToPrimitiveDestroy(false); setHasNonTrivialToPrimitiveDefaultInitializeCUnion(false); setHasNonTrivialToPrimitiveDestructCUnion(false); setHasNonTrivialToPrimitiveCopyCUnion(false); setParamDestroyedInCallee(false); setArgPassingRestrictions(RecordArgPassingKind::CanPassInRegs); setIsRandomized(false); setODRHash(0); } RecordDecl *RecordDecl::Create(const ASTContext &C, TagKind TK, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, IdentifierInfo *Id, RecordDecl* PrevDecl) { RecordDecl *R = new (C, DC) RecordDecl(Record, TK, C, DC, StartLoc, IdLoc, Id, PrevDecl); R->setMayHaveOutOfDateDef(C.getLangOpts().Modules); C.getTypeDeclType(R, PrevDecl); return R; } RecordDecl *RecordDecl::CreateDeserialized(const ASTContext &C, GlobalDeclID ID) { RecordDecl *R = new (C, ID) RecordDecl(Record, TagTypeKind::Struct, C, nullptr, SourceLocation(), SourceLocation(), nullptr, nullptr); R->setMayHaveOutOfDateDef(C.getLangOpts().Modules); return R; } bool RecordDecl::isInjectedClassName() const { return isImplicit() && getDeclName() && getDeclContext()->isRecord() && cast(getDeclContext())->getDeclName() == getDeclName(); } bool RecordDecl::isLambda() const { if (auto RD = dyn_cast(this)) return RD->isLambda(); return false; } bool RecordDecl::isCapturedRecord() const { return hasAttr(); } void RecordDecl::setCapturedRecord() { addAttr(CapturedRecordAttr::CreateImplicit(getASTContext())); } bool RecordDecl::isOrContainsUnion() const { if (isUnion()) return true; if (const RecordDecl *Def = getDefinition()) { for (const FieldDecl *FD : Def->fields()) { const RecordType *RT = FD->getType()->getAs(); if (RT && RT->getDecl()->isOrContainsUnion()) return true; } } return false; } RecordDecl::field_iterator RecordDecl::field_begin() const { if (hasExternalLexicalStorage() && !hasLoadedFieldsFromExternalStorage()) LoadFieldsFromExternalStorage(); // This is necessary for correctness for C++ with modules. // FIXME: Come up with a test case that breaks without definition. if (RecordDecl *D = getDefinition(); D && D != this) return D->field_begin(); return field_iterator(decl_iterator(FirstDecl)); } /// completeDefinition - Notes that the definition of this type is now /// complete. void RecordDecl::completeDefinition() { assert(!isCompleteDefinition() && "Cannot redefine record!"); TagDecl::completeDefinition(); ASTContext &Ctx = getASTContext(); // Layouts are dumped when computed, so if we are dumping for all complete // types, we need to force usage to get types that wouldn't be used elsewhere. // // If the type is dependent, then we can't compute its layout because there // is no way for us to know the size or alignment of a dependent type. Also // ignore declarations marked as invalid since 'getASTRecordLayout()' asserts // on that. if (Ctx.getLangOpts().DumpRecordLayoutsComplete && !isDependentType() && !isInvalidDecl()) (void)Ctx.getASTRecordLayout(this); } /// isMsStruct - Get whether or not this record uses ms_struct layout. /// This which can be turned on with an attribute, pragma, or the /// -mms-bitfields command-line option. bool RecordDecl::isMsStruct(const ASTContext &C) const { return hasAttr() || C.getLangOpts().MSBitfields == 1; } void RecordDecl::reorderDecls(const SmallVectorImpl &Decls) { std::tie(FirstDecl, LastDecl) = DeclContext::BuildDeclChain(Decls, false); LastDecl->NextInContextAndBits.setPointer(nullptr); setIsRandomized(true); } void RecordDecl::LoadFieldsFromExternalStorage() const { ExternalASTSource *Source = getASTContext().getExternalSource(); assert(hasExternalLexicalStorage() && Source && "No external storage?"); // Notify that we have a RecordDecl doing some initialization. ExternalASTSource::Deserializing TheFields(Source); SmallVector Decls; setHasLoadedFieldsFromExternalStorage(true); Source->FindExternalLexicalDecls(this, [](Decl::Kind K) { return FieldDecl::classofKind(K) || IndirectFieldDecl::classofKind(K); }, Decls); #ifndef NDEBUG // Check that all decls we got were FieldDecls. for (unsigned i=0, e=Decls.size(); i != e; ++i) assert(isa(Decls[i]) || isa(Decls[i])); #endif if (Decls.empty()) return; auto [ExternalFirst, ExternalLast] = BuildDeclChain(Decls, /*FieldsAlreadyLoaded=*/false); ExternalLast->NextInContextAndBits.setPointer(FirstDecl); FirstDecl = ExternalFirst; if (!LastDecl) LastDecl = ExternalLast; } bool RecordDecl::mayInsertExtraPadding(bool EmitRemark) const { ASTContext &Context = getASTContext(); const SanitizerMask EnabledAsanMask = Context.getLangOpts().Sanitize.Mask & (SanitizerKind::Address | SanitizerKind::KernelAddress); if (!EnabledAsanMask || !Context.getLangOpts().SanitizeAddressFieldPadding) return false; const auto &NoSanitizeList = Context.getNoSanitizeList(); const auto *CXXRD = dyn_cast(this); // We may be able to relax some of these requirements. int ReasonToReject = -1; if (!CXXRD || CXXRD->isExternCContext()) ReasonToReject = 0; // is not C++. else if (CXXRD->hasAttr()) ReasonToReject = 1; // is packed. else if (CXXRD->isUnion()) ReasonToReject = 2; // is a union. else if (CXXRD->isTriviallyCopyable()) ReasonToReject = 3; // is trivially copyable. else if (CXXRD->hasTrivialDestructor()) ReasonToReject = 4; // has trivial destructor. else if (CXXRD->isStandardLayout()) ReasonToReject = 5; // is standard layout. else if (NoSanitizeList.containsLocation(EnabledAsanMask, getLocation(), "field-padding")) ReasonToReject = 6; // is in an excluded file. else if (NoSanitizeList.containsType( EnabledAsanMask, getQualifiedNameAsString(), "field-padding")) ReasonToReject = 7; // The type is excluded. if (EmitRemark) { if (ReasonToReject >= 0) Context.getDiagnostics().Report( getLocation(), diag::remark_sanitize_address_insert_extra_padding_rejected) << getQualifiedNameAsString() << ReasonToReject; else Context.getDiagnostics().Report( getLocation(), diag::remark_sanitize_address_insert_extra_padding_accepted) << getQualifiedNameAsString(); } return ReasonToReject < 0; } const FieldDecl *RecordDecl::findFirstNamedDataMember() const { for (const auto *I : fields()) { if (I->getIdentifier()) return I; if (const auto *RT = I->getType()->getAs()) if (const FieldDecl *NamedDataMember = RT->getDecl()->findFirstNamedDataMember()) return NamedDataMember; } // We didn't find a named data member. return nullptr; } unsigned RecordDecl::getODRHash() { if (hasODRHash()) return RecordDeclBits.ODRHash; // Only calculate hash on first call of getODRHash per record. ODRHash Hash; Hash.AddRecordDecl(this); // For RecordDecl the ODRHash is stored in the remaining 26 // bit of RecordDeclBits, adjust the hash to accomodate. setODRHash(Hash.CalculateHash() >> 6); return RecordDeclBits.ODRHash; } //===----------------------------------------------------------------------===// // BlockDecl Implementation //===----------------------------------------------------------------------===// BlockDecl::BlockDecl(DeclContext *DC, SourceLocation CaretLoc) : Decl(Block, DC, CaretLoc), DeclContext(Block) { setIsVariadic(false); setCapturesCXXThis(false); setBlockMissingReturnType(true); setIsConversionFromLambda(false); setDoesNotEscape(false); setCanAvoidCopyToHeap(false); } void BlockDecl::setParams(ArrayRef NewParamInfo) { assert(!ParamInfo && "Already has param info!"); // Zero params -> null pointer. if (!NewParamInfo.empty()) { NumParams = NewParamInfo.size(); ParamInfo = new (getASTContext()) ParmVarDecl*[NewParamInfo.size()]; std::copy(NewParamInfo.begin(), NewParamInfo.end(), ParamInfo); } } void BlockDecl::setCaptures(ASTContext &Context, ArrayRef Captures, bool CapturesCXXThis) { this->setCapturesCXXThis(CapturesCXXThis); this->NumCaptures = Captures.size(); if (Captures.empty()) { this->Captures = nullptr; return; } this->Captures = Captures.copy(Context).data(); } bool BlockDecl::capturesVariable(const VarDecl *variable) const { for (const auto &I : captures()) // Only auto vars can be captured, so no redeclaration worries. if (I.getVariable() == variable) return true; return false; } SourceRange BlockDecl::getSourceRange() const { return SourceRange(getLocation(), Body ? Body->getEndLoc() : getLocation()); } //===----------------------------------------------------------------------===// // Other Decl Allocation/Deallocation Method Implementations //===----------------------------------------------------------------------===// void TranslationUnitDecl::anchor() {} TranslationUnitDecl *TranslationUnitDecl::Create(ASTContext &C) { return new (C, (DeclContext *)nullptr) TranslationUnitDecl(C); } void TranslationUnitDecl::setAnonymousNamespace(NamespaceDecl *D) { AnonymousNamespace = D; if (ASTMutationListener *Listener = Ctx.getASTMutationListener()) Listener->AddedAnonymousNamespace(this, D); } void PragmaCommentDecl::anchor() {} PragmaCommentDecl *PragmaCommentDecl::Create(const ASTContext &C, TranslationUnitDecl *DC, SourceLocation CommentLoc, PragmaMSCommentKind CommentKind, StringRef Arg) { PragmaCommentDecl *PCD = new (C, DC, additionalSizeToAlloc(Arg.size() + 1)) PragmaCommentDecl(DC, CommentLoc, CommentKind); memcpy(PCD->getTrailingObjects(), Arg.data(), Arg.size()); PCD->getTrailingObjects()[Arg.size()] = '\0'; return PCD; } PragmaCommentDecl *PragmaCommentDecl::CreateDeserialized(ASTContext &C, GlobalDeclID ID, unsigned ArgSize) { return new (C, ID, additionalSizeToAlloc(ArgSize + 1)) PragmaCommentDecl(nullptr, SourceLocation(), PCK_Unknown); } void PragmaDetectMismatchDecl::anchor() {} PragmaDetectMismatchDecl * PragmaDetectMismatchDecl::Create(const ASTContext &C, TranslationUnitDecl *DC, SourceLocation Loc, StringRef Name, StringRef Value) { size_t ValueStart = Name.size() + 1; PragmaDetectMismatchDecl *PDMD = new (C, DC, additionalSizeToAlloc(ValueStart + Value.size() + 1)) PragmaDetectMismatchDecl(DC, Loc, ValueStart); memcpy(PDMD->getTrailingObjects(), Name.data(), Name.size()); PDMD->getTrailingObjects()[Name.size()] = '\0'; memcpy(PDMD->getTrailingObjects() + ValueStart, Value.data(), Value.size()); PDMD->getTrailingObjects()[ValueStart + Value.size()] = '\0'; return PDMD; } PragmaDetectMismatchDecl * PragmaDetectMismatchDecl::CreateDeserialized(ASTContext &C, GlobalDeclID ID, unsigned NameValueSize) { return new (C, ID, additionalSizeToAlloc(NameValueSize + 1)) PragmaDetectMismatchDecl(nullptr, SourceLocation(), 0); } void ExternCContextDecl::anchor() {} ExternCContextDecl *ExternCContextDecl::Create(const ASTContext &C, TranslationUnitDecl *DC) { return new (C, DC) ExternCContextDecl(DC); } void LabelDecl::anchor() {} LabelDecl *LabelDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation IdentL, IdentifierInfo *II) { return new (C, DC) LabelDecl(DC, IdentL, II, nullptr, IdentL); } LabelDecl *LabelDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation IdentL, IdentifierInfo *II, SourceLocation GnuLabelL) { assert(GnuLabelL != IdentL && "Use this only for GNU local labels"); return new (C, DC) LabelDecl(DC, IdentL, II, nullptr, GnuLabelL); } LabelDecl *LabelDecl::CreateDeserialized(ASTContext &C, GlobalDeclID ID) { return new (C, ID) LabelDecl(nullptr, SourceLocation(), nullptr, nullptr, SourceLocation()); } void LabelDecl::setMSAsmLabel(StringRef Name) { char *Buffer = new (getASTContext(), 1) char[Name.size() + 1]; memcpy(Buffer, Name.data(), Name.size()); Buffer[Name.size()] = '\0'; MSAsmName = Buffer; } void ValueDecl::anchor() {} bool ValueDecl::isWeak() const { auto *MostRecent = getMostRecentDecl(); return MostRecent->hasAttr() || MostRecent->hasAttr() || isWeakImported(); } bool ValueDecl::isInitCapture() const { if (auto *Var = llvm::dyn_cast(this)) return Var->isInitCapture(); return false; } void ImplicitParamDecl::anchor() {} ImplicitParamDecl *ImplicitParamDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType Type, ImplicitParamKind ParamKind) { return new (C, DC) ImplicitParamDecl(C, DC, IdLoc, Id, Type, ParamKind); } ImplicitParamDecl *ImplicitParamDecl::Create(ASTContext &C, QualType Type, ImplicitParamKind ParamKind) { return new (C, nullptr) ImplicitParamDecl(C, Type, ParamKind); } ImplicitParamDecl *ImplicitParamDecl::CreateDeserialized(ASTContext &C, GlobalDeclID ID) { return new (C, ID) ImplicitParamDecl(C, QualType(), ImplicitParamKind::Other); } FunctionDecl * FunctionDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation StartLoc, const DeclarationNameInfo &NameInfo, QualType T, TypeSourceInfo *TInfo, StorageClass SC, bool UsesFPIntrin, bool isInlineSpecified, bool hasWrittenPrototype, ConstexprSpecKind ConstexprKind, Expr *TrailingRequiresClause) { FunctionDecl *New = new (C, DC) FunctionDecl( Function, C, DC, StartLoc, NameInfo, T, TInfo, SC, UsesFPIntrin, isInlineSpecified, ConstexprKind, TrailingRequiresClause); New->setHasWrittenPrototype(hasWrittenPrototype); return New; } FunctionDecl *FunctionDecl::CreateDeserialized(ASTContext &C, GlobalDeclID ID) { return new (C, ID) FunctionDecl( Function, C, nullptr, SourceLocation(), DeclarationNameInfo(), QualType(), nullptr, SC_None, false, false, ConstexprSpecKind::Unspecified, nullptr); } BlockDecl *BlockDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation L) { return new (C, DC) BlockDecl(DC, L); } BlockDecl *BlockDecl::CreateDeserialized(ASTContext &C, GlobalDeclID ID) { return new (C, ID) BlockDecl(nullptr, SourceLocation()); } CapturedDecl::CapturedDecl(DeclContext *DC, unsigned NumParams) : Decl(Captured, DC, SourceLocation()), DeclContext(Captured), NumParams(NumParams), ContextParam(0), BodyAndNothrow(nullptr, false) {} CapturedDecl *CapturedDecl::Create(ASTContext &C, DeclContext *DC, unsigned NumParams) { return new (C, DC, additionalSizeToAlloc(NumParams)) CapturedDecl(DC, NumParams); } CapturedDecl *CapturedDecl::CreateDeserialized(ASTContext &C, GlobalDeclID ID, unsigned NumParams) { return new (C, ID, additionalSizeToAlloc(NumParams)) CapturedDecl(nullptr, NumParams); } Stmt *CapturedDecl::getBody() const { return BodyAndNothrow.getPointer(); } void CapturedDecl::setBody(Stmt *B) { BodyAndNothrow.setPointer(B); } bool CapturedDecl::isNothrow() const { return BodyAndNothrow.getInt(); } void CapturedDecl::setNothrow(bool Nothrow) { BodyAndNothrow.setInt(Nothrow); } EnumConstantDecl::EnumConstantDecl(const ASTContext &C, DeclContext *DC, SourceLocation L, IdentifierInfo *Id, QualType T, Expr *E, const llvm::APSInt &V) : ValueDecl(EnumConstant, DC, L, Id, T), Init((Stmt *)E) { setInitVal(C, V); } EnumConstantDecl *EnumConstantDecl::Create(ASTContext &C, EnumDecl *CD, SourceLocation L, IdentifierInfo *Id, QualType T, Expr *E, const llvm::APSInt &V) { return new (C, CD) EnumConstantDecl(C, CD, L, Id, T, E, V); } EnumConstantDecl *EnumConstantDecl::CreateDeserialized(ASTContext &C, GlobalDeclID ID) { return new (C, ID) EnumConstantDecl(C, nullptr, SourceLocation(), nullptr, QualType(), nullptr, llvm::APSInt()); } void IndirectFieldDecl::anchor() {} IndirectFieldDecl::IndirectFieldDecl(ASTContext &C, DeclContext *DC, SourceLocation L, DeclarationName N, QualType T, MutableArrayRef CH) : ValueDecl(IndirectField, DC, L, N, T), Chaining(CH.data()), ChainingSize(CH.size()) { // In C++, indirect field declarations conflict with tag declarations in the // same scope, so add them to IDNS_Tag so that tag redeclaration finds them. if (C.getLangOpts().CPlusPlus) IdentifierNamespace |= IDNS_Tag; } IndirectFieldDecl * IndirectFieldDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation L, const IdentifierInfo *Id, QualType T, llvm::MutableArrayRef CH) { return new (C, DC) IndirectFieldDecl(C, DC, L, Id, T, CH); } IndirectFieldDecl *IndirectFieldDecl::CreateDeserialized(ASTContext &C, GlobalDeclID ID) { return new (C, ID) IndirectFieldDecl(C, nullptr, SourceLocation(), DeclarationName(), QualType(), std::nullopt); } SourceRange EnumConstantDecl::getSourceRange() const { SourceLocation End = getLocation(); if (Init) End = Init->getEndLoc(); return SourceRange(getLocation(), End); } void TypeDecl::anchor() {} TypedefDecl *TypedefDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, TypeSourceInfo *TInfo) { return new (C, DC) TypedefDecl(C, DC, StartLoc, IdLoc, Id, TInfo); } void TypedefNameDecl::anchor() {} TagDecl *TypedefNameDecl::getAnonDeclWithTypedefName(bool AnyRedecl) const { if (auto *TT = getTypeSourceInfo()->getType()->getAs()) { auto *OwningTypedef = TT->getDecl()->getTypedefNameForAnonDecl(); auto *ThisTypedef = this; if (AnyRedecl && OwningTypedef) { OwningTypedef = OwningTypedef->getCanonicalDecl(); ThisTypedef = ThisTypedef->getCanonicalDecl(); } if (OwningTypedef == ThisTypedef) return TT->getDecl(); } return nullptr; } bool TypedefNameDecl::isTransparentTagSlow() const { auto determineIsTransparent = [&]() { if (auto *TT = getUnderlyingType()->getAs()) { if (auto *TD = TT->getDecl()) { if (TD->getName() != getName()) return false; SourceLocation TTLoc = getLocation(); SourceLocation TDLoc = TD->getLocation(); if (!TTLoc.isMacroID() || !TDLoc.isMacroID()) return false; SourceManager &SM = getASTContext().getSourceManager(); return SM.getSpellingLoc(TTLoc) == SM.getSpellingLoc(TDLoc); } } return false; }; bool isTransparent = determineIsTransparent(); MaybeModedTInfo.setInt((isTransparent << 1) | 1); return isTransparent; } TypedefDecl *TypedefDecl::CreateDeserialized(ASTContext &C, GlobalDeclID ID) { return new (C, ID) TypedefDecl(C, nullptr, SourceLocation(), SourceLocation(), nullptr, nullptr); } TypeAliasDecl *TypeAliasDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, TypeSourceInfo *TInfo) { return new (C, DC) TypeAliasDecl(C, DC, StartLoc, IdLoc, Id, TInfo); } TypeAliasDecl *TypeAliasDecl::CreateDeserialized(ASTContext &C, GlobalDeclID ID) { return new (C, ID) TypeAliasDecl(C, nullptr, SourceLocation(), SourceLocation(), nullptr, nullptr); } SourceRange TypedefDecl::getSourceRange() const { SourceLocation RangeEnd = getLocation(); if (TypeSourceInfo *TInfo = getTypeSourceInfo()) { if (typeIsPostfix(TInfo->getType())) RangeEnd = TInfo->getTypeLoc().getSourceRange().getEnd(); } return SourceRange(getBeginLoc(), RangeEnd); } SourceRange TypeAliasDecl::getSourceRange() const { SourceLocation RangeEnd = getBeginLoc(); if (TypeSourceInfo *TInfo = getTypeSourceInfo()) RangeEnd = TInfo->getTypeLoc().getSourceRange().getEnd(); return SourceRange(getBeginLoc(), RangeEnd); } void FileScopeAsmDecl::anchor() {} FileScopeAsmDecl *FileScopeAsmDecl::Create(ASTContext &C, DeclContext *DC, StringLiteral *Str, SourceLocation AsmLoc, SourceLocation RParenLoc) { return new (C, DC) FileScopeAsmDecl(DC, Str, AsmLoc, RParenLoc); } FileScopeAsmDecl *FileScopeAsmDecl::CreateDeserialized(ASTContext &C, GlobalDeclID ID) { return new (C, ID) FileScopeAsmDecl(nullptr, nullptr, SourceLocation(), SourceLocation()); } void TopLevelStmtDecl::anchor() {} TopLevelStmtDecl *TopLevelStmtDecl::Create(ASTContext &C, Stmt *Statement) { assert(C.getLangOpts().IncrementalExtensions && "Must be used only in incremental mode"); SourceLocation Loc = Statement ? Statement->getBeginLoc() : SourceLocation(); DeclContext *DC = C.getTranslationUnitDecl(); return new (C, DC) TopLevelStmtDecl(DC, Loc, Statement); } TopLevelStmtDecl *TopLevelStmtDecl::CreateDeserialized(ASTContext &C, GlobalDeclID ID) { return new (C, ID) TopLevelStmtDecl(/*DC=*/nullptr, SourceLocation(), /*S=*/nullptr); } SourceRange TopLevelStmtDecl::getSourceRange() const { return SourceRange(getLocation(), Statement->getEndLoc()); } void TopLevelStmtDecl::setStmt(Stmt *S) { assert(S); Statement = S; setLocation(Statement->getBeginLoc()); } void EmptyDecl::anchor() {} EmptyDecl *EmptyDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation L) { return new (C, DC) EmptyDecl(DC, L); } EmptyDecl *EmptyDecl::CreateDeserialized(ASTContext &C, GlobalDeclID ID) { return new (C, ID) EmptyDecl(nullptr, SourceLocation()); } HLSLBufferDecl::HLSLBufferDecl(DeclContext *DC, bool CBuffer, SourceLocation KwLoc, IdentifierInfo *ID, SourceLocation IDLoc, SourceLocation LBrace) : NamedDecl(Decl::Kind::HLSLBuffer, DC, IDLoc, DeclarationName(ID)), DeclContext(Decl::Kind::HLSLBuffer), LBraceLoc(LBrace), KwLoc(KwLoc), IsCBuffer(CBuffer) {} HLSLBufferDecl *HLSLBufferDecl::Create(ASTContext &C, DeclContext *LexicalParent, bool CBuffer, SourceLocation KwLoc, IdentifierInfo *ID, SourceLocation IDLoc, SourceLocation LBrace) { // For hlsl like this // cbuffer A { // cbuffer B { // } // } // compiler should treat it as // cbuffer A { // } // cbuffer B { // } // FIXME: support nested buffers if required for back-compat. DeclContext *DC = LexicalParent; HLSLBufferDecl *Result = new (C, DC) HLSLBufferDecl(DC, CBuffer, KwLoc, ID, IDLoc, LBrace); return Result; } HLSLBufferDecl *HLSLBufferDecl::CreateDeserialized(ASTContext &C, GlobalDeclID ID) { return new (C, ID) HLSLBufferDecl(nullptr, false, SourceLocation(), nullptr, SourceLocation(), SourceLocation()); } //===----------------------------------------------------------------------===// // ImportDecl Implementation //===----------------------------------------------------------------------===// /// Retrieve the number of module identifiers needed to name the given /// module. static unsigned getNumModuleIdentifiers(Module *Mod) { unsigned Result = 1; while (Mod->Parent) { Mod = Mod->Parent; ++Result; } return Result; } ImportDecl::ImportDecl(DeclContext *DC, SourceLocation StartLoc, Module *Imported, ArrayRef IdentifierLocs) : Decl(Import, DC, StartLoc), ImportedModule(Imported), NextLocalImportAndComplete(nullptr, true) { assert(getNumModuleIdentifiers(Imported) == IdentifierLocs.size()); auto *StoredLocs = getTrailingObjects(); std::uninitialized_copy(IdentifierLocs.begin(), IdentifierLocs.end(), StoredLocs); } ImportDecl::ImportDecl(DeclContext *DC, SourceLocation StartLoc, Module *Imported, SourceLocation EndLoc) : Decl(Import, DC, StartLoc), ImportedModule(Imported), NextLocalImportAndComplete(nullptr, false) { *getTrailingObjects() = EndLoc; } ImportDecl *ImportDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation StartLoc, Module *Imported, ArrayRef IdentifierLocs) { return new (C, DC, additionalSizeToAlloc(IdentifierLocs.size())) ImportDecl(DC, StartLoc, Imported, IdentifierLocs); } ImportDecl *ImportDecl::CreateImplicit(ASTContext &C, DeclContext *DC, SourceLocation StartLoc, Module *Imported, SourceLocation EndLoc) { ImportDecl *Import = new (C, DC, additionalSizeToAlloc(1)) ImportDecl(DC, StartLoc, Imported, EndLoc); Import->setImplicit(); return Import; } ImportDecl *ImportDecl::CreateDeserialized(ASTContext &C, GlobalDeclID ID, unsigned NumLocations) { return new (C, ID, additionalSizeToAlloc(NumLocations)) ImportDecl(EmptyShell()); } ArrayRef ImportDecl::getIdentifierLocs() const { if (!isImportComplete()) return std::nullopt; const auto *StoredLocs = getTrailingObjects(); return llvm::ArrayRef(StoredLocs, getNumModuleIdentifiers(getImportedModule())); } SourceRange ImportDecl::getSourceRange() const { if (!isImportComplete()) return SourceRange(getLocation(), *getTrailingObjects()); return SourceRange(getLocation(), getIdentifierLocs().back()); } //===----------------------------------------------------------------------===// // ExportDecl Implementation //===----------------------------------------------------------------------===// void ExportDecl::anchor() {} ExportDecl *ExportDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation ExportLoc) { return new (C, DC) ExportDecl(DC, ExportLoc); } ExportDecl *ExportDecl::CreateDeserialized(ASTContext &C, GlobalDeclID ID) { return new (C, ID) ExportDecl(nullptr, SourceLocation()); } bool clang::IsArmStreamingFunction(const FunctionDecl *FD, bool IncludeLocallyStreaming) { if (IncludeLocallyStreaming) if (FD->hasAttr()) return true; if (const Type *Ty = FD->getType().getTypePtrOrNull()) if (const auto *FPT = Ty->getAs()) if (FPT->getAArch64SMEAttributes() & FunctionType::SME_PStateSMEnabledMask) return true; return false; } diff --git a/clang/lib/AST/Interp/Interp.cpp b/clang/lib/AST/Interp/Interp.cpp index 6fcd90e5f584..0f9eedc3f38e 100644 --- a/clang/lib/AST/Interp/Interp.cpp +++ b/clang/lib/AST/Interp/Interp.cpp @@ -1,954 +1,962 @@ //===------- Interp.cpp - Interpreter for the constexpr VM ------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "Interp.h" #include "Function.h" #include "InterpFrame.h" #include "InterpShared.h" #include "InterpStack.h" #include "Opcode.h" #include "PrimType.h" #include "Program.h" #include "State.h" #include "clang/AST/ASTContext.h" #include "clang/AST/ASTDiagnostic.h" #include "clang/AST/CXXInheritance.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" #include "llvm/ADT/APSInt.h" #include "llvm/ADT/StringExtras.h" #include #include using namespace clang; using namespace clang; using namespace clang::interp; static bool RetValue(InterpState &S, CodePtr &Pt, APValue &Result) { llvm::report_fatal_error("Interpreter cannot return values"); } //===----------------------------------------------------------------------===// // Jmp, Jt, Jf //===----------------------------------------------------------------------===// static bool Jmp(InterpState &S, CodePtr &PC, int32_t Offset) { PC += Offset; return true; } static bool Jt(InterpState &S, CodePtr &PC, int32_t Offset) { if (S.Stk.pop()) { PC += Offset; } return true; } static bool Jf(InterpState &S, CodePtr &PC, int32_t Offset) { if (!S.Stk.pop()) { PC += Offset; } return true; } static void diagnoseMissingInitializer(InterpState &S, CodePtr OpPC, const ValueDecl *VD) { const SourceInfo &E = S.Current->getSource(OpPC); S.FFDiag(E, diag::note_constexpr_var_init_unknown, 1) << VD; S.Note(VD->getLocation(), diag::note_declared_at) << VD->getSourceRange(); } static void diagnoseNonConstVariable(InterpState &S, CodePtr OpPC, const ValueDecl *VD); static bool diagnoseUnknownDecl(InterpState &S, CodePtr OpPC, const ValueDecl *D) { const SourceInfo &E = S.Current->getSource(OpPC); if (isa(D)) { if (S.getLangOpts().CPlusPlus11) { S.FFDiag(E, diag::note_constexpr_function_param_value_unknown) << D; S.Note(D->getLocation(), diag::note_declared_at) << D->getSourceRange(); } else { S.FFDiag(E); } return false; } if (!D->getType().isConstQualified()) diagnoseNonConstVariable(S, OpPC, D); else if (const auto *VD = dyn_cast(D); VD && !VD->getAnyInitializer()) diagnoseMissingInitializer(S, OpPC, VD); return false; } static void diagnoseNonConstVariable(InterpState &S, CodePtr OpPC, const ValueDecl *VD) { if (!S.getLangOpts().CPlusPlus) return; const SourceInfo &Loc = S.Current->getSource(OpPC); if (const auto *VarD = dyn_cast(VD); VarD && VarD->getType().isConstQualified() && !VarD->getAnyInitializer()) { diagnoseMissingInitializer(S, OpPC, VD); return; } // Rather random, but this is to match the diagnostic output of the current // interpreter. if (isa(VD)) return; if (VD->getType()->isIntegralOrEnumerationType()) { S.FFDiag(Loc, diag::note_constexpr_ltor_non_const_int, 1) << VD; S.Note(VD->getLocation(), diag::note_declared_at); return; } S.FFDiag(Loc, S.getLangOpts().CPlusPlus11 ? diag::note_constexpr_ltor_non_constexpr : diag::note_constexpr_ltor_non_integral, 1) << VD << VD->getType(); S.Note(VD->getLocation(), diag::note_declared_at); } static bool CheckActive(InterpState &S, CodePtr OpPC, const Pointer &Ptr, AccessKinds AK) { if (Ptr.isActive()) return true; // Get the inactive field descriptor. const FieldDecl *InactiveField = Ptr.getField(); // Walk up the pointer chain to find the union which is not active. Pointer U = Ptr.getBase(); while (!U.isActive()) { U = U.getBase(); } // Find the active field of the union. const Record *R = U.getRecord(); assert(R && R->isUnion() && "Not a union"); const FieldDecl *ActiveField = nullptr; for (unsigned I = 0, N = R->getNumFields(); I < N; ++I) { const Pointer &Field = U.atField(R->getField(I)->Offset); if (Field.isActive()) { ActiveField = Field.getField(); break; } } const SourceInfo &Loc = S.Current->getSource(OpPC); S.FFDiag(Loc, diag::note_constexpr_access_inactive_union_member) << AK << InactiveField << !ActiveField << ActiveField; return false; } static bool CheckTemporary(InterpState &S, CodePtr OpPC, const Pointer &Ptr, AccessKinds AK) { if (auto ID = Ptr.getDeclID()) { if (!Ptr.isStaticTemporary()) return true; if (Ptr.getDeclDesc()->getType().isConstQualified()) return true; if (S.P.getCurrentDecl() == ID) return true; const SourceInfo &E = S.Current->getSource(OpPC); S.FFDiag(E, diag::note_constexpr_access_static_temporary, 1) << AK; S.Note(Ptr.getDeclLoc(), diag::note_constexpr_temporary_here); return false; } return true; } static bool CheckGlobal(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { if (auto ID = Ptr.getDeclID()) { if (!Ptr.isStatic()) return true; if (S.P.getCurrentDecl() == ID) return true; S.FFDiag(S.Current->getLocation(OpPC), diag::note_constexpr_modify_global); return false; } return true; } namespace clang { namespace interp { static void popArg(InterpState &S, const Expr *Arg) { PrimType Ty = S.getContext().classify(Arg).value_or(PT_Ptr); TYPE_SWITCH(Ty, S.Stk.discard()); } void cleanupAfterFunctionCall(InterpState &S, CodePtr OpPC) { assert(S.Current); const Function *CurFunc = S.Current->getFunction(); assert(CurFunc); if (CurFunc->isUnevaluatedBuiltin()) return; // Some builtin functions require us to only look at the call site, since // the classified parameter types do not match. if (CurFunc->isBuiltin()) { const auto *CE = cast(S.Current->Caller->getExpr(S.Current->getRetPC())); for (int32_t I = CE->getNumArgs() - 1; I >= 0; --I) { const Expr *A = CE->getArg(I); popArg(S, A); } return; } if (S.Current->Caller && CurFunc->isVariadic()) { // CallExpr we're look for is at the return PC of the current function, i.e. // in the caller. // This code path should be executed very rarely. unsigned NumVarArgs; const Expr *const *Args = nullptr; unsigned NumArgs = 0; const Expr *CallSite = S.Current->Caller->getExpr(S.Current->getRetPC()); if (const auto *CE = dyn_cast(CallSite)) { Args = CE->getArgs(); NumArgs = CE->getNumArgs(); } else if (const auto *CE = dyn_cast(CallSite)) { Args = CE->getArgs(); NumArgs = CE->getNumArgs(); } else assert(false && "Can't get arguments from that expression type"); assert(NumArgs >= CurFunc->getNumWrittenParams()); NumVarArgs = NumArgs - CurFunc->getNumWrittenParams(); for (unsigned I = 0; I != NumVarArgs; ++I) { const Expr *A = Args[NumArgs - 1 - I]; popArg(S, A); } } // And in any case, remove the fixed parameters (the non-variadic ones) // at the end. S.Current->popArgs(); } bool CheckExtern(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { if (!Ptr.isExtern()) return true; if (Ptr.isInitialized() || (Ptr.getDeclDesc()->asVarDecl() == S.EvaluatingDecl)) return true; if (!S.checkingPotentialConstantExpression() && S.getLangOpts().CPlusPlus) { const auto *VD = Ptr.getDeclDesc()->asValueDecl(); diagnoseNonConstVariable(S, OpPC, VD); } return false; } bool CheckArray(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { if (!Ptr.isUnknownSizeArray()) return true; const SourceInfo &E = S.Current->getSource(OpPC); S.FFDiag(E, diag::note_constexpr_unsized_array_indexed); return false; } bool CheckLive(InterpState &S, CodePtr OpPC, const Pointer &Ptr, AccessKinds AK) { if (Ptr.isZero()) { const auto &Src = S.Current->getSource(OpPC); if (Ptr.isField()) S.FFDiag(Src, diag::note_constexpr_null_subobject) << CSK_Field; else S.FFDiag(Src, diag::note_constexpr_access_null) << AK; return false; } if (!Ptr.isLive()) { const auto &Src = S.Current->getSource(OpPC); bool IsTemp = Ptr.isTemporary(); S.FFDiag(Src, diag::note_constexpr_lifetime_ended, 1) << AK << !IsTemp; if (IsTemp) S.Note(Ptr.getDeclLoc(), diag::note_constexpr_temporary_here); else S.Note(Ptr.getDeclLoc(), diag::note_declared_at); return false; } return true; } bool CheckConstant(InterpState &S, CodePtr OpPC, const Descriptor *Desc) { assert(Desc); auto IsConstType = [&S](const VarDecl *VD) -> bool { if (VD->isConstexpr()) return true; QualType T = VD->getType(); if (S.getLangOpts().CPlusPlus && !S.getLangOpts().CPlusPlus11) return (T->isSignedIntegerOrEnumerationType() || T->isUnsignedIntegerOrEnumerationType()) && T.isConstQualified(); if (T.isConstQualified()) return true; if (const auto *RT = T->getAs()) return RT->getPointeeType().isConstQualified(); if (const auto *PT = T->getAs()) return PT->getPointeeType().isConstQualified(); return false; }; if (const auto *D = Desc->asVarDecl(); D && D->hasGlobalStorage() && D != S.EvaluatingDecl && !IsConstType(D)) { diagnoseNonConstVariable(S, OpPC, D); return S.inConstantContext(); } return true; } static bool CheckConstant(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { if (Ptr.isIntegralPointer()) return true; return CheckConstant(S, OpPC, Ptr.getDeclDesc()); } bool CheckNull(InterpState &S, CodePtr OpPC, const Pointer &Ptr, CheckSubobjectKind CSK) { if (!Ptr.isZero()) return true; const SourceInfo &Loc = S.Current->getSource(OpPC); S.FFDiag(Loc, diag::note_constexpr_null_subobject) << CSK << S.Current->getRange(OpPC); return false; } bool CheckRange(InterpState &S, CodePtr OpPC, const Pointer &Ptr, AccessKinds AK) { if (!Ptr.isOnePastEnd()) return true; const SourceInfo &Loc = S.Current->getSource(OpPC); S.FFDiag(Loc, diag::note_constexpr_access_past_end) << AK << S.Current->getRange(OpPC); return false; } bool CheckRange(InterpState &S, CodePtr OpPC, const Pointer &Ptr, CheckSubobjectKind CSK) { if (!Ptr.isElementPastEnd()) return true; const SourceInfo &Loc = S.Current->getSource(OpPC); S.FFDiag(Loc, diag::note_constexpr_past_end_subobject) << CSK << S.Current->getRange(OpPC); return false; } bool CheckSubobject(InterpState &S, CodePtr OpPC, const Pointer &Ptr, CheckSubobjectKind CSK) { if (!Ptr.isOnePastEnd()) return true; const SourceInfo &Loc = S.Current->getSource(OpPC); S.FFDiag(Loc, diag::note_constexpr_past_end_subobject) << CSK << S.Current->getRange(OpPC); return false; } bool CheckDowncast(InterpState &S, CodePtr OpPC, const Pointer &Ptr, uint32_t Offset) { uint32_t MinOffset = Ptr.getDeclDesc()->getMetadataSize(); uint32_t PtrOffset = Ptr.getByteOffset(); // We subtract Offset from PtrOffset. The result must be at least // MinOffset. if (Offset < PtrOffset && (PtrOffset - Offset) >= MinOffset) return true; const auto *E = cast(S.Current->getExpr(OpPC)); QualType TargetQT = E->getType()->getPointeeType(); QualType MostDerivedQT = Ptr.getDeclPtr().getType(); S.CCEDiag(E, diag::note_constexpr_invalid_downcast) << MostDerivedQT << TargetQT; return false; } bool CheckConst(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { assert(Ptr.isLive() && "Pointer is not live"); if (!Ptr.isConst() || Ptr.isMutable()) return true; // The This pointer is writable in constructors and destructors, // even if isConst() returns true. // TODO(perf): We could be hitting this code path quite a lot in complex // constructors. Is there a better way to do this? if (S.Current->getFunction()) { for (const InterpFrame *Frame = S.Current; Frame; Frame = Frame->Caller) { if (const Function *Func = Frame->getFunction(); Func && (Func->isConstructor() || Func->isDestructor()) && Ptr.block() == Frame->getThis().block()) { return true; } } } if (!Ptr.isBlockPointer()) return false; const QualType Ty = Ptr.getType(); const SourceInfo &Loc = S.Current->getSource(OpPC); S.FFDiag(Loc, diag::note_constexpr_modify_const_type) << Ty; return false; } bool CheckMutable(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { assert(Ptr.isLive() && "Pointer is not live"); if (!Ptr.isMutable()) return true; // In C++14 onwards, it is permitted to read a mutable member whose // lifetime began within the evaluation. if (S.getLangOpts().CPlusPlus14 && Ptr.block()->getEvalID() == S.Ctx.getEvalID()) return true; const SourceInfo &Loc = S.Current->getSource(OpPC); const FieldDecl *Field = Ptr.getField(); S.FFDiag(Loc, diag::note_constexpr_access_mutable, 1) << AK_Read << Field; S.Note(Field->getLocation(), diag::note_declared_at); return false; } bool CheckVolatile(InterpState &S, CodePtr OpPC, const Pointer &Ptr, AccessKinds AK) { assert(Ptr.isLive()); // FIXME: This check here might be kinda expensive. Maybe it would be better // to have another field in InlineDescriptor for this? if (!Ptr.isBlockPointer()) return true; QualType PtrType = Ptr.getType(); if (!PtrType.isVolatileQualified()) return true; const SourceInfo &Loc = S.Current->getSource(OpPC); if (S.getLangOpts().CPlusPlus) S.FFDiag(Loc, diag::note_constexpr_access_volatile_type) << AK << PtrType; else S.FFDiag(Loc); return false; } bool CheckInitialized(InterpState &S, CodePtr OpPC, const Pointer &Ptr, AccessKinds AK) { assert(Ptr.isLive()); if (Ptr.isInitialized()) return true; if (const auto *VD = Ptr.getDeclDesc()->asVarDecl(); VD && VD->hasGlobalStorage()) { const SourceInfo &Loc = S.Current->getSource(OpPC); if (VD->getAnyInitializer()) { S.FFDiag(Loc, diag::note_constexpr_var_init_non_constant, 1) << VD; S.Note(VD->getLocation(), diag::note_declared_at); } else { diagnoseMissingInitializer(S, OpPC, VD); } return false; } if (!S.checkingPotentialConstantExpression()) { S.FFDiag(S.Current->getSource(OpPC), diag::note_constexpr_access_uninit) << AK << /*uninitialized=*/true << S.Current->getRange(OpPC); } return false; } bool CheckGlobalInitialized(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { if (Ptr.isInitialized()) return true; assert(S.getLangOpts().CPlusPlus); const auto *VD = cast(Ptr.getDeclDesc()->asValueDecl()); if ((!VD->hasConstantInitialization() && VD->mightBeUsableInConstantExpressions(S.getCtx())) || (S.getLangOpts().OpenCL && !S.getLangOpts().CPlusPlus11 && !VD->hasICEInitializer(S.getCtx()))) { const SourceInfo &Loc = S.Current->getSource(OpPC); S.FFDiag(Loc, diag::note_constexpr_var_init_non_constant, 1) << VD; S.Note(VD->getLocation(), diag::note_declared_at); } return false; } bool CheckLoad(InterpState &S, CodePtr OpPC, const Pointer &Ptr, AccessKinds AK) { if (!CheckLive(S, OpPC, Ptr, AK)) return false; if (!CheckConstant(S, OpPC, Ptr)) return false; if (!CheckDummy(S, OpPC, Ptr, AK)) return false; if (!CheckExtern(S, OpPC, Ptr)) return false; if (!CheckRange(S, OpPC, Ptr, AK)) return false; if (!CheckActive(S, OpPC, Ptr, AK)) return false; if (!CheckInitialized(S, OpPC, Ptr, AK)) return false; if (!CheckTemporary(S, OpPC, Ptr, AK)) return false; if (!CheckMutable(S, OpPC, Ptr)) return false; if (!CheckVolatile(S, OpPC, Ptr, AK)) return false; return true; } bool CheckStore(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { if (!CheckLive(S, OpPC, Ptr, AK_Assign)) return false; if (!CheckDummy(S, OpPC, Ptr, AK_Assign)) return false; if (!CheckExtern(S, OpPC, Ptr)) return false; if (!CheckRange(S, OpPC, Ptr, AK_Assign)) return false; if (!CheckGlobal(S, OpPC, Ptr)) return false; if (!CheckConst(S, OpPC, Ptr)) return false; return true; } bool CheckInvoke(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { if (!CheckLive(S, OpPC, Ptr, AK_MemberCall)) return false; if (!Ptr.isDummy()) { if (!CheckExtern(S, OpPC, Ptr)) return false; if (!CheckRange(S, OpPC, Ptr, AK_MemberCall)) return false; } return true; } bool CheckInit(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { if (!CheckLive(S, OpPC, Ptr, AK_Assign)) return false; if (!CheckRange(S, OpPC, Ptr, AK_Assign)) return false; return true; } bool CheckCallable(InterpState &S, CodePtr OpPC, const Function *F) { if (F->isVirtual() && !S.getLangOpts().CPlusPlus20) { const SourceLocation &Loc = S.Current->getLocation(OpPC); S.CCEDiag(Loc, diag::note_constexpr_virtual_call); return false; } if (F->isConstexpr() && F->hasBody() && (F->getDecl()->isConstexpr() || F->getDecl()->hasAttr())) return true; // Implicitly constexpr. if (F->isLambdaStaticInvoker()) return true; const SourceLocation &Loc = S.Current->getLocation(OpPC); if (S.getLangOpts().CPlusPlus11) { const FunctionDecl *DiagDecl = F->getDecl(); // Invalid decls have been diagnosed before. if (DiagDecl->isInvalidDecl()) return false; // If this function is not constexpr because it is an inherited // non-constexpr constructor, diagnose that directly. const auto *CD = dyn_cast(DiagDecl); if (CD && CD->isInheritingConstructor()) { const auto *Inherited = CD->getInheritedConstructor().getConstructor(); if (!Inherited->isConstexpr()) DiagDecl = CD = Inherited; } // FIXME: If DiagDecl is an implicitly-declared special member function // or an inheriting constructor, we should be much more explicit about why // it's not constexpr. if (CD && CD->isInheritingConstructor()) { S.FFDiag(Loc, diag::note_constexpr_invalid_inhctor, 1) << CD->getInheritedConstructor().getConstructor()->getParent(); S.Note(DiagDecl->getLocation(), diag::note_declared_at); } else { // Don't emit anything if the function isn't defined and we're checking // for a constant expression. It might be defined at the point we're // actually calling it. bool IsExtern = DiagDecl->getStorageClass() == SC_Extern; if (!DiagDecl->isDefined() && !IsExtern && DiagDecl->isConstexpr() && S.checkingPotentialConstantExpression()) return false; // If the declaration is defined, declared 'constexpr' _and_ has a body, // the below diagnostic doesn't add anything useful. if (DiagDecl->isDefined() && DiagDecl->isConstexpr() && DiagDecl->hasBody()) return false; S.FFDiag(Loc, diag::note_constexpr_invalid_function, 1) << DiagDecl->isConstexpr() << (bool)CD << DiagDecl; S.Note(DiagDecl->getLocation(), diag::note_declared_at); } } else { S.FFDiag(Loc, diag::note_invalid_subexpr_in_const_expr); } return false; } bool CheckCallDepth(InterpState &S, CodePtr OpPC) { if ((S.Current->getDepth() + 1) > S.getLangOpts().ConstexprCallDepth) { S.FFDiag(S.Current->getSource(OpPC), diag::note_constexpr_depth_limit_exceeded) << S.getLangOpts().ConstexprCallDepth; return false; } return true; } bool CheckThis(InterpState &S, CodePtr OpPC, const Pointer &This) { if (!This.isZero()) return true; const SourceInfo &Loc = S.Current->getSource(OpPC); bool IsImplicit = false; if (const auto *E = dyn_cast_if_present(Loc.asExpr())) IsImplicit = E->isImplicit(); if (S.getLangOpts().CPlusPlus11) S.FFDiag(Loc, diag::note_constexpr_this) << IsImplicit; else S.FFDiag(Loc); return false; } bool CheckPure(InterpState &S, CodePtr OpPC, const CXXMethodDecl *MD) { if (!MD->isPureVirtual()) return true; const SourceInfo &E = S.Current->getSource(OpPC); S.FFDiag(E, diag::note_constexpr_pure_virtual_call, 1) << MD; S.Note(MD->getLocation(), diag::note_declared_at); return false; } bool CheckFloatResult(InterpState &S, CodePtr OpPC, const Floating &Result, APFloat::opStatus Status) { const SourceInfo &E = S.Current->getSource(OpPC); // [expr.pre]p4: // If during the evaluation of an expression, the result is not // mathematically defined [...], the behavior is undefined. // FIXME: C++ rules require us to not conform to IEEE 754 here. if (Result.isNan()) { S.CCEDiag(E, diag::note_constexpr_float_arithmetic) << /*NaN=*/true << S.Current->getRange(OpPC); return S.noteUndefinedBehavior(); } // In a constant context, assume that any dynamic rounding mode or FP // exception state matches the default floating-point environment. if (S.inConstantContext()) return true; FPOptions FPO = E.asExpr()->getFPFeaturesInEffect(S.Ctx.getLangOpts()); if ((Status & APFloat::opInexact) && FPO.getRoundingMode() == llvm::RoundingMode::Dynamic) { // Inexact result means that it depends on rounding mode. If the requested // mode is dynamic, the evaluation cannot be made in compile time. S.FFDiag(E, diag::note_constexpr_dynamic_rounding); return false; } if ((Status != APFloat::opOK) && (FPO.getRoundingMode() == llvm::RoundingMode::Dynamic || FPO.getExceptionMode() != LangOptions::FPE_Ignore || FPO.getAllowFEnvAccess())) { S.FFDiag(E, diag::note_constexpr_float_arithmetic_strict); return false; } if ((Status & APFloat::opStatus::opInvalidOp) && FPO.getExceptionMode() != LangOptions::FPE_Ignore) { // There is no usefully definable result. S.FFDiag(E); return false; } return true; } bool CheckDynamicMemoryAllocation(InterpState &S, CodePtr OpPC) { if (S.getLangOpts().CPlusPlus20) return true; const SourceInfo &E = S.Current->getSource(OpPC); S.CCEDiag(E, diag::note_constexpr_new); return true; } bool CheckNewDeleteForms(InterpState &S, CodePtr OpPC, bool NewWasArray, bool DeleteIsArray, const Descriptor *D, const Expr *NewExpr) { if (NewWasArray == DeleteIsArray) return true; QualType TypeToDiagnose; // We need to shuffle things around a bit here to get a better diagnostic, // because the expression we allocated the block for was of type int*, // but we want to get the array size right. if (D->isArray()) { QualType ElemQT = D->getType()->getPointeeType(); TypeToDiagnose = S.getCtx().getConstantArrayType( ElemQT, APInt(64, static_cast(D->getNumElems()), false), nullptr, ArraySizeModifier::Normal, 0); } else TypeToDiagnose = D->getType()->getPointeeType(); const SourceInfo &E = S.Current->getSource(OpPC); S.FFDiag(E, diag::note_constexpr_new_delete_mismatch) << DeleteIsArray << 0 << TypeToDiagnose; S.Note(NewExpr->getExprLoc(), diag::note_constexpr_dynamic_alloc_here) << NewExpr->getSourceRange(); return false; } bool CheckDeleteSource(InterpState &S, CodePtr OpPC, const Expr *Source, const Pointer &Ptr) { if (Source && isa(Source)) return true; // Whatever this is, we didn't heap allocate it. const SourceInfo &Loc = S.Current->getSource(OpPC); S.FFDiag(Loc, diag::note_constexpr_delete_not_heap_alloc) << Ptr.toDiagnosticString(S.getCtx()); if (Ptr.isTemporary()) S.Note(Ptr.getDeclLoc(), diag::note_constexpr_temporary_here); else S.Note(Ptr.getDeclLoc(), diag::note_declared_at); return false; } /// We aleady know the given DeclRefExpr is invalid for some reason, /// now figure out why and print appropriate diagnostics. bool CheckDeclRef(InterpState &S, CodePtr OpPC, const DeclRefExpr *DR) { const ValueDecl *D = DR->getDecl(); return diagnoseUnknownDecl(S, OpPC, D); } bool CheckDummy(InterpState &S, CodePtr OpPC, const Pointer &Ptr, AccessKinds AK) { if (!Ptr.isDummy()) return true; const Descriptor *Desc = Ptr.getDeclDesc(); const ValueDecl *D = Desc->asValueDecl(); if (!D) return false; if (AK == AK_Read || AK == AK_Increment || AK == AK_Decrement) return diagnoseUnknownDecl(S, OpPC, D); assert(AK == AK_Assign); if (S.getLangOpts().CPlusPlus11) { const SourceInfo &E = S.Current->getSource(OpPC); S.FFDiag(E, diag::note_constexpr_modify_global); } return false; } bool CheckNonNullArgs(InterpState &S, CodePtr OpPC, const Function *F, const CallExpr *CE, unsigned ArgSize) { auto Args = llvm::ArrayRef(CE->getArgs(), CE->getNumArgs()); auto NonNullArgs = collectNonNullArgs(F->getDecl(), Args); unsigned Offset = 0; unsigned Index = 0; for (const Expr *Arg : Args) { if (NonNullArgs[Index] && Arg->getType()->isPointerType()) { const Pointer &ArgPtr = S.Stk.peek(ArgSize - Offset); if (ArgPtr.isZero()) { const SourceLocation &Loc = S.Current->getLocation(OpPC); S.CCEDiag(Loc, diag::note_non_null_attribute_failed); return false; } } Offset += align(primSize(S.Ctx.classify(Arg).value_or(PT_Ptr))); ++Index; } return true; } // FIXME: This is similar to code we already have in Compiler.cpp. // I think it makes sense to instead add the field and base destruction stuff // to the destructor Function itself. Then destroying a record would really // _just_ be calling its destructor. That would also help with the diagnostic // difference when the destructor or a field/base fails. static bool runRecordDestructor(InterpState &S, CodePtr OpPC, const Pointer &BasePtr, const Descriptor *Desc) { assert(Desc->isRecord()); const Record *R = Desc->ElemRecord; assert(R); // Fields. for (const Record::Field &Field : llvm::reverse(R->fields())) { const Descriptor *D = Field.Desc; if (D->isRecord()) { if (!runRecordDestructor(S, OpPC, BasePtr.atField(Field.Offset), D)) return false; } else if (D->isCompositeArray()) { const Descriptor *ElemDesc = Desc->ElemDesc; assert(ElemDesc->isRecord()); for (unsigned I = 0; I != Desc->getNumElems(); ++I) { if (!runRecordDestructor(S, OpPC, BasePtr.atIndex(I).narrow(), ElemDesc)) return false; } } } // Destructor of this record. if (const CXXDestructorDecl *Dtor = R->getDestructor(); Dtor && !Dtor->isTrivial()) { const Function *DtorFunc = S.getContext().getOrCreateFunction(Dtor); if (!DtorFunc) return false; S.Stk.push(BasePtr); if (!Call(S, OpPC, DtorFunc, 0)) return false; } // Bases. for (const Record::Base &Base : llvm::reverse(R->bases())) { if (!runRecordDestructor(S, OpPC, BasePtr.atField(Base.Offset), Base.Desc)) return false; } return true; } bool RunDestructors(InterpState &S, CodePtr OpPC, const Block *B) { assert(B); const Descriptor *Desc = B->getDescriptor(); if (Desc->isPrimitive() || Desc->isPrimitiveArray()) return true; assert(Desc->isRecord() || Desc->isCompositeArray()); if (Desc->isCompositeArray()) { const Descriptor *ElemDesc = Desc->ElemDesc; assert(ElemDesc->isRecord()); Pointer RP(const_cast(B)); for (unsigned I = 0; I != Desc->getNumElems(); ++I) { if (!runRecordDestructor(S, OpPC, RP.atIndex(I).narrow(), ElemDesc)) return false; } return true; } assert(Desc->isRecord()); return runRecordDestructor(S, OpPC, Pointer(const_cast(B)), Desc); } void diagnoseEnumValue(InterpState &S, CodePtr OpPC, const EnumDecl *ED, const APSInt &Value) { llvm::APInt Min; llvm::APInt Max; if (S.EvaluatingDecl && !S.EvaluatingDecl->isConstexpr()) return; ED->getValueRange(Max, Min); --Max; if (ED->getNumNegativeBits() && (Max.slt(Value.getSExtValue()) || Min.sgt(Value.getSExtValue()))) { const SourceLocation &Loc = S.Current->getLocation(OpPC); S.report(Loc, diag::warn_constexpr_unscoped_enum_out_of_range) << llvm::toString(Value, 10) << Min.getSExtValue() << Max.getSExtValue() << ED; } else if (!ED->getNumNegativeBits() && Max.ult(Value.getZExtValue())) { const SourceLocation &Loc = S.Current->getLocation(OpPC); S.report(Loc, diag::warn_constexpr_unscoped_enum_out_of_range) << llvm::toString(Value, 10) << Min.getZExtValue() << Max.getZExtValue() << ED; } } +// https://github.com/llvm/llvm-project/issues/102513 +#if defined(_WIN32) && !defined(__clang__) && !defined(NDEBUG) +#pragma optimize("", off) +#endif bool Interpret(InterpState &S, APValue &Result) { // The current stack frame when we started Interpret(). // This is being used by the ops to determine wheter // to return from this function and thus terminate // interpretation. const InterpFrame *StartFrame = S.Current; assert(!S.Current->isRoot()); CodePtr PC = S.Current->getPC(); // Empty program. if (!PC) return true; for (;;) { auto Op = PC.read(); CodePtr OpPC = PC; switch (Op) { #define GET_INTERP #include "Opcodes.inc" #undef GET_INTERP } } } +// https://github.com/llvm/llvm-project/issues/102513 +#if defined(_WIN32) && !defined(__clang__) && !defined(NDEBUG) +#pragma optimize("", on) +#endif } // namespace interp } // namespace clang diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 151505baf38d..2a5d5f9083ae 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1,7795 +1,7795 @@ //===--- CodeGenModule.cpp - Emit LLVM Code from ASTs for a Module --------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This coordinates the per-module state used while generating code. // //===----------------------------------------------------------------------===// #include "CodeGenModule.h" #include "ABIInfo.h" #include "CGBlocks.h" #include "CGCUDARuntime.h" #include "CGCXXABI.h" #include "CGCall.h" #include "CGDebugInfo.h" #include "CGHLSLRuntime.h" #include "CGObjCRuntime.h" #include "CGOpenCLRuntime.h" #include "CGOpenMPRuntime.h" #include "CGOpenMPRuntimeGPU.h" #include "CodeGenFunction.h" #include "CodeGenPGO.h" #include "ConstantEmitter.h" #include "CoverageMappingGen.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/ASTLambda.h" #include "clang/AST/CharUnits.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/Mangle.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/StmtVisitor.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/CharInfo.h" #include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/Module.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/Version.h" #include "clang/CodeGen/BackendUtil.h" #include "clang/CodeGen/ConstantInitBuilder.h" #include "clang/Frontend/FrontendDiagnostic.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/AttributeMask.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/ProfileData/SampleProf.h" #include "llvm/Support/CRC.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TimeProfiler.h" #include "llvm/Support/xxhash.h" #include "llvm/TargetParser/RISCVISAInfo.h" #include "llvm/TargetParser/Triple.h" #include "llvm/TargetParser/X86TargetParser.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include using namespace clang; using namespace CodeGen; static llvm::cl::opt LimitedCoverage( "limited-coverage-experimental", llvm::cl::Hidden, llvm::cl::desc("Emit limited coverage mapping information (experimental)")); static const char AnnotationSection[] = "llvm.metadata"; static CGCXXABI *createCXXABI(CodeGenModule &CGM) { switch (CGM.getContext().getCXXABIKind()) { case TargetCXXABI::AppleARM64: case TargetCXXABI::Fuchsia: case TargetCXXABI::GenericAArch64: case TargetCXXABI::GenericARM: case TargetCXXABI::iOS: case TargetCXXABI::WatchOS: case TargetCXXABI::GenericMIPS: case TargetCXXABI::GenericItanium: case TargetCXXABI::WebAssembly: case TargetCXXABI::XL: return CreateItaniumCXXABI(CGM); case TargetCXXABI::Microsoft: return CreateMicrosoftCXXABI(CGM); } llvm_unreachable("invalid C++ ABI kind"); } static std::unique_ptr createTargetCodeGenInfo(CodeGenModule &CGM) { const TargetInfo &Target = CGM.getTarget(); const llvm::Triple &Triple = Target.getTriple(); const CodeGenOptions &CodeGenOpts = CGM.getCodeGenOpts(); switch (Triple.getArch()) { default: return createDefaultTargetCodeGenInfo(CGM); case llvm::Triple::le32: return createPNaClTargetCodeGenInfo(CGM); case llvm::Triple::m68k: return createM68kTargetCodeGenInfo(CGM); case llvm::Triple::mips: case llvm::Triple::mipsel: if (Triple.getOS() == llvm::Triple::NaCl) return createPNaClTargetCodeGenInfo(CGM); return createMIPSTargetCodeGenInfo(CGM, /*IsOS32=*/true); case llvm::Triple::mips64: case llvm::Triple::mips64el: return createMIPSTargetCodeGenInfo(CGM, /*IsOS32=*/false); case llvm::Triple::avr: { // For passing parameters, R8~R25 are used on avr, and R18~R25 are used // on avrtiny. For passing return value, R18~R25 are used on avr, and // R22~R25 are used on avrtiny. unsigned NPR = Target.getABI() == "avrtiny" ? 6 : 18; unsigned NRR = Target.getABI() == "avrtiny" ? 4 : 8; return createAVRTargetCodeGenInfo(CGM, NPR, NRR); } case llvm::Triple::aarch64: case llvm::Triple::aarch64_32: case llvm::Triple::aarch64_be: { AArch64ABIKind Kind = AArch64ABIKind::AAPCS; if (Target.getABI() == "darwinpcs") Kind = AArch64ABIKind::DarwinPCS; else if (Triple.isOSWindows()) return createWindowsAArch64TargetCodeGenInfo(CGM, AArch64ABIKind::Win64); else if (Target.getABI() == "aapcs-soft") Kind = AArch64ABIKind::AAPCSSoft; else if (Target.getABI() == "pauthtest") Kind = AArch64ABIKind::PAuthTest; return createAArch64TargetCodeGenInfo(CGM, Kind); } case llvm::Triple::wasm32: case llvm::Triple::wasm64: { WebAssemblyABIKind Kind = WebAssemblyABIKind::MVP; if (Target.getABI() == "experimental-mv") Kind = WebAssemblyABIKind::ExperimentalMV; return createWebAssemblyTargetCodeGenInfo(CGM, Kind); } case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: { if (Triple.getOS() == llvm::Triple::Win32) return createWindowsARMTargetCodeGenInfo(CGM, ARMABIKind::AAPCS_VFP); ARMABIKind Kind = ARMABIKind::AAPCS; StringRef ABIStr = Target.getABI(); if (ABIStr == "apcs-gnu") Kind = ARMABIKind::APCS; else if (ABIStr == "aapcs16") Kind = ARMABIKind::AAPCS16_VFP; else if (CodeGenOpts.FloatABI == "hard" || (CodeGenOpts.FloatABI != "soft" && Triple.isHardFloatABI())) Kind = ARMABIKind::AAPCS_VFP; return createARMTargetCodeGenInfo(CGM, Kind); } case llvm::Triple::ppc: { if (Triple.isOSAIX()) return createAIXTargetCodeGenInfo(CGM, /*Is64Bit=*/false); bool IsSoftFloat = CodeGenOpts.FloatABI == "soft" || Target.hasFeature("spe"); return createPPC32TargetCodeGenInfo(CGM, IsSoftFloat); } case llvm::Triple::ppcle: { bool IsSoftFloat = CodeGenOpts.FloatABI == "soft"; return createPPC32TargetCodeGenInfo(CGM, IsSoftFloat); } case llvm::Triple::ppc64: if (Triple.isOSAIX()) return createAIXTargetCodeGenInfo(CGM, /*Is64Bit=*/true); if (Triple.isOSBinFormatELF()) { PPC64_SVR4_ABIKind Kind = PPC64_SVR4_ABIKind::ELFv1; if (Target.getABI() == "elfv2") Kind = PPC64_SVR4_ABIKind::ELFv2; bool IsSoftFloat = CodeGenOpts.FloatABI == "soft"; return createPPC64_SVR4_TargetCodeGenInfo(CGM, Kind, IsSoftFloat); } return createPPC64TargetCodeGenInfo(CGM); case llvm::Triple::ppc64le: { assert(Triple.isOSBinFormatELF() && "PPC64 LE non-ELF not supported!"); PPC64_SVR4_ABIKind Kind = PPC64_SVR4_ABIKind::ELFv2; if (Target.getABI() == "elfv1") Kind = PPC64_SVR4_ABIKind::ELFv1; bool IsSoftFloat = CodeGenOpts.FloatABI == "soft"; return createPPC64_SVR4_TargetCodeGenInfo(CGM, Kind, IsSoftFloat); } case llvm::Triple::nvptx: case llvm::Triple::nvptx64: return createNVPTXTargetCodeGenInfo(CGM); case llvm::Triple::msp430: return createMSP430TargetCodeGenInfo(CGM); case llvm::Triple::riscv32: case llvm::Triple::riscv64: { StringRef ABIStr = Target.getABI(); unsigned XLen = Target.getPointerWidth(LangAS::Default); unsigned ABIFLen = 0; if (ABIStr.ends_with("f")) ABIFLen = 32; else if (ABIStr.ends_with("d")) ABIFLen = 64; bool EABI = ABIStr.ends_with("e"); return createRISCVTargetCodeGenInfo(CGM, XLen, ABIFLen, EABI); } case llvm::Triple::systemz: { bool SoftFloat = CodeGenOpts.FloatABI == "soft"; bool HasVector = !SoftFloat && Target.getABI() == "vector"; return createSystemZTargetCodeGenInfo(CGM, HasVector, SoftFloat); } case llvm::Triple::tce: case llvm::Triple::tcele: return createTCETargetCodeGenInfo(CGM); case llvm::Triple::x86: { bool IsDarwinVectorABI = Triple.isOSDarwin(); bool IsWin32FloatStructABI = Triple.isOSWindows() && !Triple.isOSCygMing(); if (Triple.getOS() == llvm::Triple::Win32) { return createWinX86_32TargetCodeGenInfo( CGM, IsDarwinVectorABI, IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters); } return createX86_32TargetCodeGenInfo( CGM, IsDarwinVectorABI, IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters, CodeGenOpts.FloatABI == "soft"); } case llvm::Triple::x86_64: { StringRef ABI = Target.getABI(); X86AVXABILevel AVXLevel = (ABI == "avx512" ? X86AVXABILevel::AVX512 : ABI == "avx" ? X86AVXABILevel::AVX : X86AVXABILevel::None); switch (Triple.getOS()) { case llvm::Triple::Win32: return createWinX86_64TargetCodeGenInfo(CGM, AVXLevel); default: return createX86_64TargetCodeGenInfo(CGM, AVXLevel); } } case llvm::Triple::hexagon: return createHexagonTargetCodeGenInfo(CGM); case llvm::Triple::lanai: return createLanaiTargetCodeGenInfo(CGM); case llvm::Triple::r600: return createAMDGPUTargetCodeGenInfo(CGM); case llvm::Triple::amdgcn: return createAMDGPUTargetCodeGenInfo(CGM); case llvm::Triple::sparc: return createSparcV8TargetCodeGenInfo(CGM); case llvm::Triple::sparcv9: return createSparcV9TargetCodeGenInfo(CGM); case llvm::Triple::xcore: return createXCoreTargetCodeGenInfo(CGM); case llvm::Triple::arc: return createARCTargetCodeGenInfo(CGM); case llvm::Triple::spir: case llvm::Triple::spir64: return createCommonSPIRTargetCodeGenInfo(CGM); case llvm::Triple::spirv32: case llvm::Triple::spirv64: return createSPIRVTargetCodeGenInfo(CGM); case llvm::Triple::ve: return createVETargetCodeGenInfo(CGM); case llvm::Triple::csky: { bool IsSoftFloat = !Target.hasFeature("hard-float-abi"); bool hasFP64 = Target.hasFeature("fpuv2_df") || Target.hasFeature("fpuv3_df"); return createCSKYTargetCodeGenInfo(CGM, IsSoftFloat ? 0 : hasFP64 ? 64 : 32); } case llvm::Triple::bpfeb: case llvm::Triple::bpfel: return createBPFTargetCodeGenInfo(CGM); case llvm::Triple::loongarch32: case llvm::Triple::loongarch64: { StringRef ABIStr = Target.getABI(); unsigned ABIFRLen = 0; if (ABIStr.ends_with("f")) ABIFRLen = 32; else if (ABIStr.ends_with("d")) ABIFRLen = 64; return createLoongArchTargetCodeGenInfo( CGM, Target.getPointerWidth(LangAS::Default), ABIFRLen); } } } const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { if (!TheTargetCodeGenInfo) TheTargetCodeGenInfo = createTargetCodeGenInfo(*this); return *TheTargetCodeGenInfo; } CodeGenModule::CodeGenModule(ASTContext &C, IntrusiveRefCntPtr FS, const HeaderSearchOptions &HSO, const PreprocessorOptions &PPO, const CodeGenOptions &CGO, llvm::Module &M, DiagnosticsEngine &diags, CoverageSourceInfo *CoverageInfo) : Context(C), LangOpts(C.getLangOpts()), FS(FS), HeaderSearchOpts(HSO), PreprocessorOpts(PPO), CodeGenOpts(CGO), TheModule(M), Diags(diags), Target(C.getTargetInfo()), ABI(createCXXABI(*this)), VMContext(M.getContext()), VTables(*this), SanitizerMD(new SanitizerMetadata(*this)) { // Initialize the type cache. Types.reset(new CodeGenTypes(*this)); llvm::LLVMContext &LLVMContext = M.getContext(); VoidTy = llvm::Type::getVoidTy(LLVMContext); Int8Ty = llvm::Type::getInt8Ty(LLVMContext); Int16Ty = llvm::Type::getInt16Ty(LLVMContext); Int32Ty = llvm::Type::getInt32Ty(LLVMContext); Int64Ty = llvm::Type::getInt64Ty(LLVMContext); HalfTy = llvm::Type::getHalfTy(LLVMContext); BFloatTy = llvm::Type::getBFloatTy(LLVMContext); FloatTy = llvm::Type::getFloatTy(LLVMContext); DoubleTy = llvm::Type::getDoubleTy(LLVMContext); PointerWidthInBits = C.getTargetInfo().getPointerWidth(LangAS::Default); PointerAlignInBytes = C.toCharUnitsFromBits(C.getTargetInfo().getPointerAlign(LangAS::Default)) .getQuantity(); SizeSizeInBytes = C.toCharUnitsFromBits(C.getTargetInfo().getMaxPointerWidth()).getQuantity(); IntAlignInBytes = C.toCharUnitsFromBits(C.getTargetInfo().getIntAlign()).getQuantity(); CharTy = llvm::IntegerType::get(LLVMContext, C.getTargetInfo().getCharWidth()); IntTy = llvm::IntegerType::get(LLVMContext, C.getTargetInfo().getIntWidth()); IntPtrTy = llvm::IntegerType::get(LLVMContext, C.getTargetInfo().getMaxPointerWidth()); Int8PtrTy = llvm::PointerType::get(LLVMContext, C.getTargetAddressSpace(LangAS::Default)); const llvm::DataLayout &DL = M.getDataLayout(); AllocaInt8PtrTy = llvm::PointerType::get(LLVMContext, DL.getAllocaAddrSpace()); GlobalsInt8PtrTy = llvm::PointerType::get(LLVMContext, DL.getDefaultGlobalsAddressSpace()); ConstGlobalsPtrTy = llvm::PointerType::get( LLVMContext, C.getTargetAddressSpace(GetGlobalConstantAddressSpace())); ASTAllocaAddressSpace = getTargetCodeGenInfo().getASTAllocaAddressSpace(); // Build C++20 Module initializers. // TODO: Add Microsoft here once we know the mangling required for the // initializers. CXX20ModuleInits = LangOpts.CPlusPlusModules && getCXXABI().getMangleContext().getKind() == ItaniumMangleContext::MK_Itanium; RuntimeCC = getTargetCodeGenInfo().getABIInfo().getRuntimeCC(); if (LangOpts.ObjC) createObjCRuntime(); if (LangOpts.OpenCL) createOpenCLRuntime(); if (LangOpts.OpenMP) createOpenMPRuntime(); if (LangOpts.CUDA) createCUDARuntime(); if (LangOpts.HLSL) createHLSLRuntime(); // Enable TBAA unless it's suppressed. ThreadSanitizer needs TBAA even at O0. if (LangOpts.Sanitize.has(SanitizerKind::Thread) || (!CodeGenOpts.RelaxedAliasing && CodeGenOpts.OptimizationLevel > 0)) TBAA.reset(new CodeGenTBAA(Context, getTypes(), TheModule, CodeGenOpts, getLangOpts())); // If debug info or coverage generation is enabled, create the CGDebugInfo // object. if (CodeGenOpts.getDebugInfo() != llvm::codegenoptions::NoDebugInfo || CodeGenOpts.CoverageNotesFile.size() || CodeGenOpts.CoverageDataFile.size()) DebugInfo.reset(new CGDebugInfo(*this)); Block.GlobalUniqueCount = 0; if (C.getLangOpts().ObjC) ObjCData.reset(new ObjCEntrypoints()); if (CodeGenOpts.hasProfileClangUse()) { auto ReaderOrErr = llvm::IndexedInstrProfReader::create( CodeGenOpts.ProfileInstrumentUsePath, *FS, CodeGenOpts.ProfileRemappingFile); // We're checking for profile read errors in CompilerInvocation, so if // there was an error it should've already been caught. If it hasn't been // somehow, trip an assertion. assert(ReaderOrErr); PGOReader = std::move(ReaderOrErr.get()); } // If coverage mapping generation is enabled, create the // CoverageMappingModuleGen object. if (CodeGenOpts.CoverageMapping) CoverageMapping.reset(new CoverageMappingModuleGen(*this, *CoverageInfo)); // Generate the module name hash here if needed. if (CodeGenOpts.UniqueInternalLinkageNames && !getModule().getSourceFileName().empty()) { std::string Path = getModule().getSourceFileName(); // Check if a path substitution is needed from the MacroPrefixMap. for (const auto &Entry : LangOpts.MacroPrefixMap) if (Path.rfind(Entry.first, 0) != std::string::npos) { Path = Entry.second + Path.substr(Entry.first.size()); break; } ModuleNameHash = llvm::getUniqueInternalLinkagePostfix(Path); } // Record mregparm value now so it is visible through all of codegen. if (Context.getTargetInfo().getTriple().getArch() == llvm::Triple::x86) getModule().addModuleFlag(llvm::Module::Error, "NumRegisterParameters", CodeGenOpts.NumRegisterParameters); } CodeGenModule::~CodeGenModule() {} void CodeGenModule::createObjCRuntime() { // This is just isGNUFamily(), but we want to force implementors of // new ABIs to decide how best to do this. switch (LangOpts.ObjCRuntime.getKind()) { case ObjCRuntime::GNUstep: case ObjCRuntime::GCC: case ObjCRuntime::ObjFW: ObjCRuntime.reset(CreateGNUObjCRuntime(*this)); return; case ObjCRuntime::FragileMacOSX: case ObjCRuntime::MacOSX: case ObjCRuntime::iOS: case ObjCRuntime::WatchOS: ObjCRuntime.reset(CreateMacObjCRuntime(*this)); return; } llvm_unreachable("bad runtime kind"); } void CodeGenModule::createOpenCLRuntime() { OpenCLRuntime.reset(new CGOpenCLRuntime(*this)); } void CodeGenModule::createOpenMPRuntime() { // Select a specialized code generation class based on the target, if any. // If it does not exist use the default implementation. switch (getTriple().getArch()) { case llvm::Triple::nvptx: case llvm::Triple::nvptx64: case llvm::Triple::amdgcn: assert(getLangOpts().OpenMPIsTargetDevice && "OpenMP AMDGPU/NVPTX is only prepared to deal with device code."); OpenMPRuntime.reset(new CGOpenMPRuntimeGPU(*this)); break; default: if (LangOpts.OpenMPSimd) OpenMPRuntime.reset(new CGOpenMPSIMDRuntime(*this)); else OpenMPRuntime.reset(new CGOpenMPRuntime(*this)); break; } } void CodeGenModule::createCUDARuntime() { CUDARuntime.reset(CreateNVCUDARuntime(*this)); } void CodeGenModule::createHLSLRuntime() { HLSLRuntime.reset(new CGHLSLRuntime(*this)); } void CodeGenModule::addReplacement(StringRef Name, llvm::Constant *C) { Replacements[Name] = C; } void CodeGenModule::applyReplacements() { for (auto &I : Replacements) { StringRef MangledName = I.first; llvm::Constant *Replacement = I.second; llvm::GlobalValue *Entry = GetGlobalValue(MangledName); if (!Entry) continue; auto *OldF = cast(Entry); auto *NewF = dyn_cast(Replacement); if (!NewF) { if (auto *Alias = dyn_cast(Replacement)) { NewF = dyn_cast(Alias->getAliasee()); } else { auto *CE = cast(Replacement); assert(CE->getOpcode() == llvm::Instruction::BitCast || CE->getOpcode() == llvm::Instruction::GetElementPtr); NewF = dyn_cast(CE->getOperand(0)); } } // Replace old with new, but keep the old order. OldF->replaceAllUsesWith(Replacement); if (NewF) { NewF->removeFromParent(); OldF->getParent()->getFunctionList().insertAfter(OldF->getIterator(), NewF); } OldF->eraseFromParent(); } } void CodeGenModule::addGlobalValReplacement(llvm::GlobalValue *GV, llvm::Constant *C) { GlobalValReplacements.push_back(std::make_pair(GV, C)); } void CodeGenModule::applyGlobalValReplacements() { for (auto &I : GlobalValReplacements) { llvm::GlobalValue *GV = I.first; llvm::Constant *C = I.second; GV->replaceAllUsesWith(C); GV->eraseFromParent(); } } // This is only used in aliases that we created and we know they have a // linear structure. static const llvm::GlobalValue *getAliasedGlobal(const llvm::GlobalValue *GV) { const llvm::Constant *C; if (auto *GA = dyn_cast(GV)) C = GA->getAliasee(); else if (auto *GI = dyn_cast(GV)) C = GI->getResolver(); else return GV; const auto *AliaseeGV = dyn_cast(C->stripPointerCasts()); if (!AliaseeGV) return nullptr; const llvm::GlobalValue *FinalGV = AliaseeGV->getAliaseeObject(); if (FinalGV == GV) return nullptr; return FinalGV; } static bool checkAliasedGlobal( const ASTContext &Context, DiagnosticsEngine &Diags, SourceLocation Location, bool IsIFunc, const llvm::GlobalValue *Alias, const llvm::GlobalValue *&GV, const llvm::MapVector &MangledDeclNames, SourceRange AliasRange) { GV = getAliasedGlobal(Alias); if (!GV) { Diags.Report(Location, diag::err_cyclic_alias) << IsIFunc; return false; } if (GV->hasCommonLinkage()) { const llvm::Triple &Triple = Context.getTargetInfo().getTriple(); if (Triple.getObjectFormat() == llvm::Triple::XCOFF) { Diags.Report(Location, diag::err_alias_to_common); return false; } } if (GV->isDeclaration()) { Diags.Report(Location, diag::err_alias_to_undefined) << IsIFunc << IsIFunc; Diags.Report(Location, diag::note_alias_requires_mangled_name) << IsIFunc << IsIFunc; // Provide a note if the given function is not found and exists as a // mangled name. for (const auto &[Decl, Name] : MangledDeclNames) { if (const auto *ND = dyn_cast(Decl.getDecl())) { if (ND->getName() == GV->getName()) { Diags.Report(Location, diag::note_alias_mangled_name_alternative) << Name << FixItHint::CreateReplacement( AliasRange, (Twine(IsIFunc ? "ifunc" : "alias") + "(\"" + Name + "\")") .str()); } } } return false; } if (IsIFunc) { // Check resolver function type. const auto *F = dyn_cast(GV); if (!F) { Diags.Report(Location, diag::err_alias_to_undefined) << IsIFunc << IsIFunc; return false; } llvm::FunctionType *FTy = F->getFunctionType(); if (!FTy->getReturnType()->isPointerTy()) { Diags.Report(Location, diag::err_ifunc_resolver_return); return false; } } return true; } // Emit a warning if toc-data attribute is requested for global variables that // have aliases and remove the toc-data attribute. static void checkAliasForTocData(llvm::GlobalVariable *GVar, const CodeGenOptions &CodeGenOpts, DiagnosticsEngine &Diags, SourceLocation Location) { if (GVar->hasAttribute("toc-data")) { auto GVId = GVar->getName(); // Is this a global variable specified by the user as local? if ((llvm::binary_search(CodeGenOpts.TocDataVarsUserSpecified, GVId))) { Diags.Report(Location, diag::warn_toc_unsupported_type) << GVId << "the variable has an alias"; } llvm::AttributeSet CurrAttributes = GVar->getAttributes(); llvm::AttributeSet NewAttributes = CurrAttributes.removeAttribute(GVar->getContext(), "toc-data"); GVar->setAttributes(NewAttributes); } } void CodeGenModule::checkAliases() { // Check if the constructed aliases are well formed. It is really unfortunate // that we have to do this in CodeGen, but we only construct mangled names // and aliases during codegen. bool Error = false; DiagnosticsEngine &Diags = getDiags(); for (const GlobalDecl &GD : Aliases) { const auto *D = cast(GD.getDecl()); SourceLocation Location; SourceRange Range; bool IsIFunc = D->hasAttr(); if (const Attr *A = D->getDefiningAttr()) { Location = A->getLocation(); Range = A->getRange(); } else llvm_unreachable("Not an alias or ifunc?"); StringRef MangledName = getMangledName(GD); llvm::GlobalValue *Alias = GetGlobalValue(MangledName); const llvm::GlobalValue *GV = nullptr; if (!checkAliasedGlobal(getContext(), Diags, Location, IsIFunc, Alias, GV, MangledDeclNames, Range)) { Error = true; continue; } if (getContext().getTargetInfo().getTriple().isOSAIX()) if (const llvm::GlobalVariable *GVar = dyn_cast(GV)) checkAliasForTocData(const_cast(GVar), getCodeGenOpts(), Diags, Location); llvm::Constant *Aliasee = IsIFunc ? cast(Alias)->getResolver() : cast(Alias)->getAliasee(); llvm::GlobalValue *AliaseeGV; if (auto CE = dyn_cast(Aliasee)) AliaseeGV = cast(CE->getOperand(0)); else AliaseeGV = cast(Aliasee); if (const SectionAttr *SA = D->getAttr()) { StringRef AliasSection = SA->getName(); if (AliasSection != AliaseeGV->getSection()) Diags.Report(SA->getLocation(), diag::warn_alias_with_section) << AliasSection << IsIFunc << IsIFunc; } // We have to handle alias to weak aliases in here. LLVM itself disallows // this since the object semantics would not match the IL one. For // compatibility with gcc we implement it by just pointing the alias // to its aliasee's aliasee. We also warn, since the user is probably // expecting the link to be weak. if (auto *GA = dyn_cast(AliaseeGV)) { if (GA->isInterposable()) { Diags.Report(Location, diag::warn_alias_to_weak_alias) << GV->getName() << GA->getName() << IsIFunc; Aliasee = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( GA->getAliasee(), Alias->getType()); if (IsIFunc) cast(Alias)->setResolver(Aliasee); else cast(Alias)->setAliasee(Aliasee); } } // ifunc resolvers are usually implemented to run before sanitizer // initialization. Disable instrumentation to prevent the ordering issue. if (IsIFunc) cast(Aliasee)->addFnAttr( llvm::Attribute::DisableSanitizerInstrumentation); } if (!Error) return; for (const GlobalDecl &GD : Aliases) { StringRef MangledName = getMangledName(GD); llvm::GlobalValue *Alias = GetGlobalValue(MangledName); Alias->replaceAllUsesWith(llvm::UndefValue::get(Alias->getType())); Alias->eraseFromParent(); } } void CodeGenModule::clear() { DeferredDeclsToEmit.clear(); EmittedDeferredDecls.clear(); DeferredAnnotations.clear(); if (OpenMPRuntime) OpenMPRuntime->clear(); } void InstrProfStats::reportDiagnostics(DiagnosticsEngine &Diags, StringRef MainFile) { if (!hasDiagnostics()) return; if (VisitedInMainFile > 0 && VisitedInMainFile == MissingInMainFile) { if (MainFile.empty()) MainFile = ""; Diags.Report(diag::warn_profile_data_unprofiled) << MainFile; } else { if (Mismatched > 0) Diags.Report(diag::warn_profile_data_out_of_date) << Visited << Mismatched; if (Missing > 0) Diags.Report(diag::warn_profile_data_missing) << Visited << Missing; } } static std::optional getLLVMVisibility(clang::LangOptions::VisibilityFromDLLStorageClassKinds K) { // Map to LLVM visibility. switch (K) { case clang::LangOptions::VisibilityFromDLLStorageClassKinds::Keep: return std::nullopt; case clang::LangOptions::VisibilityFromDLLStorageClassKinds::Default: return llvm::GlobalValue::DefaultVisibility; case clang::LangOptions::VisibilityFromDLLStorageClassKinds::Hidden: return llvm::GlobalValue::HiddenVisibility; case clang::LangOptions::VisibilityFromDLLStorageClassKinds::Protected: return llvm::GlobalValue::ProtectedVisibility; } llvm_unreachable("unknown option value!"); } void setLLVMVisibility(llvm::GlobalValue &GV, std::optional V) { if (!V) return; // Reset DSO locality before setting the visibility. This removes // any effects that visibility options and annotations may have // had on the DSO locality. Setting the visibility will implicitly set // appropriate globals to DSO Local; however, this will be pessimistic // w.r.t. to the normal compiler IRGen. GV.setDSOLocal(false); GV.setVisibility(*V); } static void setVisibilityFromDLLStorageClass(const clang::LangOptions &LO, llvm::Module &M) { if (!LO.VisibilityFromDLLStorageClass) return; std::optional DLLExportVisibility = getLLVMVisibility(LO.getDLLExportVisibility()); std::optional NoDLLStorageClassVisibility = getLLVMVisibility(LO.getNoDLLStorageClassVisibility()); std::optional ExternDeclDLLImportVisibility = getLLVMVisibility(LO.getExternDeclDLLImportVisibility()); std::optional ExternDeclNoDLLStorageClassVisibility = getLLVMVisibility(LO.getExternDeclNoDLLStorageClassVisibility()); for (llvm::GlobalValue &GV : M.global_values()) { if (GV.hasAppendingLinkage() || GV.hasLocalLinkage()) continue; if (GV.isDeclarationForLinker()) setLLVMVisibility(GV, GV.getDLLStorageClass() == llvm::GlobalValue::DLLImportStorageClass ? ExternDeclDLLImportVisibility : ExternDeclNoDLLStorageClassVisibility); else setLLVMVisibility(GV, GV.getDLLStorageClass() == llvm::GlobalValue::DLLExportStorageClass ? DLLExportVisibility : NoDLLStorageClassVisibility); GV.setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass); } } static bool isStackProtectorOn(const LangOptions &LangOpts, const llvm::Triple &Triple, clang::LangOptions::StackProtectorMode Mode) { if (Triple.isAMDGPU() || Triple.isNVPTX()) return false; return LangOpts.getStackProtector() == Mode; } void CodeGenModule::Release() { Module *Primary = getContext().getCurrentNamedModule(); if (CXX20ModuleInits && Primary && !Primary->isHeaderLikeModule()) EmitModuleInitializers(Primary); EmitDeferred(); DeferredDecls.insert(EmittedDeferredDecls.begin(), EmittedDeferredDecls.end()); EmittedDeferredDecls.clear(); EmitVTablesOpportunistically(); applyGlobalValReplacements(); applyReplacements(); emitMultiVersionFunctions(); if (Context.getLangOpts().IncrementalExtensions && GlobalTopLevelStmtBlockInFlight.first) { const TopLevelStmtDecl *TLSD = GlobalTopLevelStmtBlockInFlight.second; GlobalTopLevelStmtBlockInFlight.first->FinishFunction(TLSD->getEndLoc()); GlobalTopLevelStmtBlockInFlight = {nullptr, nullptr}; } // Module implementations are initialized the same way as a regular TU that // imports one or more modules. if (CXX20ModuleInits && Primary && Primary->isInterfaceOrPartition()) EmitCXXModuleInitFunc(Primary); else EmitCXXGlobalInitFunc(); EmitCXXGlobalCleanUpFunc(); registerGlobalDtorsWithAtExit(); EmitCXXThreadLocalInitFunc(); if (ObjCRuntime) if (llvm::Function *ObjCInitFunction = ObjCRuntime->ModuleInitFunction()) AddGlobalCtor(ObjCInitFunction); if (Context.getLangOpts().CUDA && CUDARuntime) { if (llvm::Function *CudaCtorFunction = CUDARuntime->finalizeModule()) AddGlobalCtor(CudaCtorFunction); } if (OpenMPRuntime) { OpenMPRuntime->createOffloadEntriesAndInfoMetadata(); OpenMPRuntime->clear(); } if (PGOReader) { getModule().setProfileSummary( PGOReader->getSummary(/* UseCS */ false).getMD(VMContext), llvm::ProfileSummary::PSK_Instr); if (PGOStats.hasDiagnostics()) PGOStats.reportDiagnostics(getDiags(), getCodeGenOpts().MainFileName); } llvm::stable_sort(GlobalCtors, [](const Structor &L, const Structor &R) { return L.LexOrder < R.LexOrder; }); EmitCtorList(GlobalCtors, "llvm.global_ctors"); EmitCtorList(GlobalDtors, "llvm.global_dtors"); EmitGlobalAnnotations(); EmitStaticExternCAliases(); checkAliases(); EmitDeferredUnusedCoverageMappings(); CodeGenPGO(*this).setValueProfilingFlag(getModule()); CodeGenPGO(*this).setProfileVersion(getModule()); if (CoverageMapping) CoverageMapping->emit(); if (CodeGenOpts.SanitizeCfiCrossDso) { CodeGenFunction(*this).EmitCfiCheckFail(); CodeGenFunction(*this).EmitCfiCheckStub(); } if (LangOpts.Sanitize.has(SanitizerKind::KCFI)) finalizeKCFITypes(); emitAtAvailableLinkGuard(); if (Context.getTargetInfo().getTriple().isWasm()) EmitMainVoidAlias(); if (getTriple().isAMDGPU() || (getTriple().isSPIRV() && getTriple().getVendor() == llvm::Triple::AMD)) { // Emit amdhsa_code_object_version module flag, which is code object version // times 100. if (getTarget().getTargetOpts().CodeObjectVersion != llvm::CodeObjectVersionKind::COV_None) { getModule().addModuleFlag(llvm::Module::Error, "amdhsa_code_object_version", getTarget().getTargetOpts().CodeObjectVersion); } // Currently, "-mprintf-kind" option is only supported for HIP if (LangOpts.HIP) { auto *MDStr = llvm::MDString::get( getLLVMContext(), (getTarget().getTargetOpts().AMDGPUPrintfKindVal == TargetOptions::AMDGPUPrintfKind::Hostcall) ? "hostcall" : "buffered"); getModule().addModuleFlag(llvm::Module::Error, "amdgpu_printf_kind", MDStr); } } // Emit a global array containing all external kernels or device variables // used by host functions and mark it as used for CUDA/HIP. This is necessary // to get kernels or device variables in archives linked in even if these // kernels or device variables are only used in host functions. if (!Context.CUDAExternalDeviceDeclODRUsedByHost.empty()) { SmallVector UsedArray; for (auto D : Context.CUDAExternalDeviceDeclODRUsedByHost) { GlobalDecl GD; if (auto *FD = dyn_cast(D)) GD = GlobalDecl(FD, KernelReferenceKind::Kernel); else GD = GlobalDecl(D); UsedArray.push_back(llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( GetAddrOfGlobal(GD), Int8PtrTy)); } llvm::ArrayType *ATy = llvm::ArrayType::get(Int8PtrTy, UsedArray.size()); auto *GV = new llvm::GlobalVariable( getModule(), ATy, false, llvm::GlobalValue::InternalLinkage, llvm::ConstantArray::get(ATy, UsedArray), "__clang_gpu_used_external"); addCompilerUsedGlobal(GV); } if (LangOpts.HIP && !getLangOpts().OffloadingNewDriver) { // Emit a unique ID so that host and device binaries from the same // compilation unit can be associated. auto *GV = new llvm::GlobalVariable( getModule(), Int8Ty, false, llvm::GlobalValue::ExternalLinkage, llvm::Constant::getNullValue(Int8Ty), "__hip_cuid_" + getContext().getCUIDHash()); addCompilerUsedGlobal(GV); } emitLLVMUsed(); if (SanStats) SanStats->finish(); if (CodeGenOpts.Autolink && (Context.getLangOpts().Modules || !LinkerOptionsMetadata.empty())) { EmitModuleLinkOptions(); } // On ELF we pass the dependent library specifiers directly to the linker // without manipulating them. This is in contrast to other platforms where // they are mapped to a specific linker option by the compiler. This // difference is a result of the greater variety of ELF linkers and the fact // that ELF linkers tend to handle libraries in a more complicated fashion // than on other platforms. This forces us to defer handling the dependent // libs to the linker. // // CUDA/HIP device and host libraries are different. Currently there is no // way to differentiate dependent libraries for host or device. Existing // usage of #pragma comment(lib, *) is intended for host libraries on // Windows. Therefore emit llvm.dependent-libraries only for host. if (!ELFDependentLibraries.empty() && !Context.getLangOpts().CUDAIsDevice) { auto *NMD = getModule().getOrInsertNamedMetadata("llvm.dependent-libraries"); for (auto *MD : ELFDependentLibraries) NMD->addOperand(MD); } if (CodeGenOpts.DwarfVersion) { getModule().addModuleFlag(llvm::Module::Max, "Dwarf Version", CodeGenOpts.DwarfVersion); } if (CodeGenOpts.Dwarf64) getModule().addModuleFlag(llvm::Module::Max, "DWARF64", 1); if (Context.getLangOpts().SemanticInterposition) // Require various optimization to respect semantic interposition. getModule().setSemanticInterposition(true); if (CodeGenOpts.EmitCodeView) { // Indicate that we want CodeView in the metadata. getModule().addModuleFlag(llvm::Module::Warning, "CodeView", 1); } if (CodeGenOpts.CodeViewGHash) { getModule().addModuleFlag(llvm::Module::Warning, "CodeViewGHash", 1); } if (CodeGenOpts.ControlFlowGuard) { // Function ID tables and checks for Control Flow Guard (cfguard=2). getModule().addModuleFlag(llvm::Module::Warning, "cfguard", 2); } else if (CodeGenOpts.ControlFlowGuardNoChecks) { // Function ID tables for Control Flow Guard (cfguard=1). getModule().addModuleFlag(llvm::Module::Warning, "cfguard", 1); } if (CodeGenOpts.EHContGuard) { // Function ID tables for EH Continuation Guard. getModule().addModuleFlag(llvm::Module::Warning, "ehcontguard", 1); } if (Context.getLangOpts().Kernel) { // Note if we are compiling with /kernel. getModule().addModuleFlag(llvm::Module::Warning, "ms-kernel", 1); } if (CodeGenOpts.OptimizationLevel > 0 && CodeGenOpts.StrictVTablePointers) { // We don't support LTO with 2 with different StrictVTablePointers // FIXME: we could support it by stripping all the information introduced // by StrictVTablePointers. getModule().addModuleFlag(llvm::Module::Error, "StrictVTablePointers",1); llvm::Metadata *Ops[2] = { llvm::MDString::get(VMContext, "StrictVTablePointers"), llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( llvm::Type::getInt32Ty(VMContext), 1))}; getModule().addModuleFlag(llvm::Module::Require, "StrictVTablePointersRequirement", llvm::MDNode::get(VMContext, Ops)); } if (getModuleDebugInfo()) // We support a single version in the linked module. The LLVM // parser will drop debug info with a different version number // (and warn about it, too). getModule().addModuleFlag(llvm::Module::Warning, "Debug Info Version", llvm::DEBUG_METADATA_VERSION); // We need to record the widths of enums and wchar_t, so that we can generate // the correct build attributes in the ARM backend. wchar_size is also used by // TargetLibraryInfo. uint64_t WCharWidth = Context.getTypeSizeInChars(Context.getWideCharType()).getQuantity(); getModule().addModuleFlag(llvm::Module::Error, "wchar_size", WCharWidth); if (getTriple().isOSzOS()) { getModule().addModuleFlag(llvm::Module::Warning, "zos_product_major_version", uint32_t(CLANG_VERSION_MAJOR)); getModule().addModuleFlag(llvm::Module::Warning, "zos_product_minor_version", uint32_t(CLANG_VERSION_MINOR)); getModule().addModuleFlag(llvm::Module::Warning, "zos_product_patchlevel", uint32_t(CLANG_VERSION_PATCHLEVEL)); std::string ProductId = getClangVendor() + "clang"; getModule().addModuleFlag(llvm::Module::Error, "zos_product_id", llvm::MDString::get(VMContext, ProductId)); // Record the language because we need it for the PPA2. StringRef lang_str = languageToString( LangStandard::getLangStandardForKind(LangOpts.LangStd).Language); getModule().addModuleFlag(llvm::Module::Error, "zos_cu_language", llvm::MDString::get(VMContext, lang_str)); time_t TT = PreprocessorOpts.SourceDateEpoch ? *PreprocessorOpts.SourceDateEpoch : std::time(nullptr); getModule().addModuleFlag(llvm::Module::Max, "zos_translation_time", static_cast(TT)); // Multiple modes will be supported here. getModule().addModuleFlag(llvm::Module::Error, "zos_le_char_mode", llvm::MDString::get(VMContext, "ascii")); } llvm::Triple T = Context.getTargetInfo().getTriple(); if (T.isARM() || T.isThumb()) { // The minimum width of an enum in bytes uint64_t EnumWidth = Context.getLangOpts().ShortEnums ? 1 : 4; getModule().addModuleFlag(llvm::Module::Error, "min_enum_size", EnumWidth); } if (T.isRISCV()) { StringRef ABIStr = Target.getABI(); llvm::LLVMContext &Ctx = TheModule.getContext(); getModule().addModuleFlag(llvm::Module::Error, "target-abi", llvm::MDString::get(Ctx, ABIStr)); // Add the canonical ISA string as metadata so the backend can set the ELF // attributes correctly. We use AppendUnique so LTO will keep all of the // unique ISA strings that were linked together. const std::vector &Features = getTarget().getTargetOpts().Features; auto ParseResult = llvm::RISCVISAInfo::parseFeatures(T.isRISCV64() ? 64 : 32, Features); if (!errorToBool(ParseResult.takeError())) getModule().addModuleFlag( llvm::Module::AppendUnique, "riscv-isa", llvm::MDNode::get( Ctx, llvm::MDString::get(Ctx, (*ParseResult)->toString()))); } if (CodeGenOpts.SanitizeCfiCrossDso) { // Indicate that we want cross-DSO control flow integrity checks. getModule().addModuleFlag(llvm::Module::Override, "Cross-DSO CFI", 1); } if (CodeGenOpts.WholeProgramVTables) { // Indicate whether VFE was enabled for this module, so that the // vcall_visibility metadata added under whole program vtables is handled // appropriately in the optimizer. getModule().addModuleFlag(llvm::Module::Error, "Virtual Function Elim", CodeGenOpts.VirtualFunctionElimination); } if (LangOpts.Sanitize.has(SanitizerKind::CFIICall)) { getModule().addModuleFlag(llvm::Module::Override, "CFI Canonical Jump Tables", CodeGenOpts.SanitizeCfiCanonicalJumpTables); } if (CodeGenOpts.SanitizeCfiICallNormalizeIntegers) { getModule().addModuleFlag(llvm::Module::Override, "cfi-normalize-integers", 1); } if (LangOpts.Sanitize.has(SanitizerKind::KCFI)) { getModule().addModuleFlag(llvm::Module::Override, "kcfi", 1); // KCFI assumes patchable-function-prefix is the same for all indirectly // called functions. Store the expected offset for code generation. if (CodeGenOpts.PatchableFunctionEntryOffset) getModule().addModuleFlag(llvm::Module::Override, "kcfi-offset", CodeGenOpts.PatchableFunctionEntryOffset); } if (CodeGenOpts.CFProtectionReturn && Target.checkCFProtectionReturnSupported(getDiags())) { // Indicate that we want to instrument return control flow protection. getModule().addModuleFlag(llvm::Module::Min, "cf-protection-return", 1); } if (CodeGenOpts.CFProtectionBranch && Target.checkCFProtectionBranchSupported(getDiags())) { // Indicate that we want to instrument branch control flow protection. getModule().addModuleFlag(llvm::Module::Min, "cf-protection-branch", 1); } if (CodeGenOpts.FunctionReturnThunks) getModule().addModuleFlag(llvm::Module::Override, "function_return_thunk_extern", 1); if (CodeGenOpts.IndirectBranchCSPrefix) getModule().addModuleFlag(llvm::Module::Override, "indirect_branch_cs_prefix", 1); // Add module metadata for return address signing (ignoring // non-leaf/all) and stack tagging. These are actually turned on by function // attributes, but we use module metadata to emit build attributes. This is // needed for LTO, where the function attributes are inside bitcode // serialised into a global variable by the time build attributes are // emitted, so we can't access them. LTO objects could be compiled with // different flags therefore module flags are set to "Min" behavior to achieve // the same end result of the normal build where e.g BTI is off if any object // doesn't support it. if (Context.getTargetInfo().hasFeature("ptrauth") && LangOpts.getSignReturnAddressScope() != LangOptions::SignReturnAddressScopeKind::None) getModule().addModuleFlag(llvm::Module::Override, "sign-return-address-buildattr", 1); if (LangOpts.Sanitize.has(SanitizerKind::MemtagStack)) getModule().addModuleFlag(llvm::Module::Override, "tag-stack-memory-buildattr", 1); if (T.isARM() || T.isThumb() || T.isAArch64()) { if (LangOpts.BranchTargetEnforcement) getModule().addModuleFlag(llvm::Module::Min, "branch-target-enforcement", 1); if (LangOpts.BranchProtectionPAuthLR) getModule().addModuleFlag(llvm::Module::Min, "branch-protection-pauth-lr", 1); if (LangOpts.GuardedControlStack) getModule().addModuleFlag(llvm::Module::Min, "guarded-control-stack", 1); if (LangOpts.hasSignReturnAddress()) getModule().addModuleFlag(llvm::Module::Min, "sign-return-address", 1); if (LangOpts.isSignReturnAddressScopeAll()) getModule().addModuleFlag(llvm::Module::Min, "sign-return-address-all", 1); if (!LangOpts.isSignReturnAddressWithAKey()) getModule().addModuleFlag(llvm::Module::Min, "sign-return-address-with-bkey", 1); if (getTriple().isOSLinux()) { assert(getTriple().isOSBinFormatELF()); using namespace llvm::ELF; uint64_t PAuthABIVersion = (LangOpts.PointerAuthIntrinsics << AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_INTRINSICS) | (LangOpts.PointerAuthCalls << AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_CALLS) | (LangOpts.PointerAuthReturns << AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_RETURNS) | (LangOpts.PointerAuthAuthTraps << AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_AUTHTRAPS) | (LangOpts.PointerAuthVTPtrAddressDiscrimination << AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_VPTRADDRDISCR) | (LangOpts.PointerAuthVTPtrTypeDiscrimination << AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_VPTRTYPEDISCR) | (LangOpts.PointerAuthInitFini << AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_INITFINI); static_assert(AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_INITFINI == AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_LAST, "Update when new enum items are defined"); if (PAuthABIVersion != 0) { getModule().addModuleFlag(llvm::Module::Error, "aarch64-elf-pauthabi-platform", AARCH64_PAUTH_PLATFORM_LLVM_LINUX); getModule().addModuleFlag(llvm::Module::Error, "aarch64-elf-pauthabi-version", PAuthABIVersion); } } } if (CodeGenOpts.StackClashProtector) getModule().addModuleFlag( llvm::Module::Override, "probe-stack", llvm::MDString::get(TheModule.getContext(), "inline-asm")); if (CodeGenOpts.StackProbeSize && CodeGenOpts.StackProbeSize != 4096) getModule().addModuleFlag(llvm::Module::Min, "stack-probe-size", CodeGenOpts.StackProbeSize); if (!CodeGenOpts.MemoryProfileOutput.empty()) { llvm::LLVMContext &Ctx = TheModule.getContext(); getModule().addModuleFlag( llvm::Module::Error, "MemProfProfileFilename", llvm::MDString::get(Ctx, CodeGenOpts.MemoryProfileOutput)); } if (LangOpts.CUDAIsDevice && getTriple().isNVPTX()) { // Indicate whether __nvvm_reflect should be configured to flush denormal // floating point values to 0. (This corresponds to its "__CUDA_FTZ" // property.) getModule().addModuleFlag(llvm::Module::Override, "nvvm-reflect-ftz", CodeGenOpts.FP32DenormalMode.Output != llvm::DenormalMode::IEEE); } if (LangOpts.EHAsynch) getModule().addModuleFlag(llvm::Module::Warning, "eh-asynch", 1); // Indicate whether this Module was compiled with -fopenmp if (getLangOpts().OpenMP && !getLangOpts().OpenMPSimd) getModule().addModuleFlag(llvm::Module::Max, "openmp", LangOpts.OpenMP); if (getLangOpts().OpenMPIsTargetDevice) getModule().addModuleFlag(llvm::Module::Max, "openmp-device", LangOpts.OpenMP); // Emit OpenCL specific module metadata: OpenCL/SPIR version. if (LangOpts.OpenCL || (LangOpts.CUDAIsDevice && getTriple().isSPIRV())) { EmitOpenCLMetadata(); // Emit SPIR version. if (getTriple().isSPIR()) { // SPIR v2.0 s2.12 - The SPIR version used by the module is stored in the // opencl.spir.version named metadata. // C++ for OpenCL has a distinct mapping for version compatibility with // OpenCL. auto Version = LangOpts.getOpenCLCompatibleVersion(); llvm::Metadata *SPIRVerElts[] = { llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( Int32Ty, Version / 100)), llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( Int32Ty, (Version / 100 > 1) ? 0 : 2))}; llvm::NamedMDNode *SPIRVerMD = TheModule.getOrInsertNamedMetadata("opencl.spir.version"); llvm::LLVMContext &Ctx = TheModule.getContext(); SPIRVerMD->addOperand(llvm::MDNode::get(Ctx, SPIRVerElts)); } } // HLSL related end of code gen work items. if (LangOpts.HLSL) getHLSLRuntime().finishCodeGen(); if (uint32_t PLevel = Context.getLangOpts().PICLevel) { assert(PLevel < 3 && "Invalid PIC Level"); getModule().setPICLevel(static_cast(PLevel)); if (Context.getLangOpts().PIE) getModule().setPIELevel(static_cast(PLevel)); } if (getCodeGenOpts().CodeModel.size() > 0) { unsigned CM = llvm::StringSwitch(getCodeGenOpts().CodeModel) .Case("tiny", llvm::CodeModel::Tiny) .Case("small", llvm::CodeModel::Small) .Case("kernel", llvm::CodeModel::Kernel) .Case("medium", llvm::CodeModel::Medium) .Case("large", llvm::CodeModel::Large) .Default(~0u); if (CM != ~0u) { llvm::CodeModel::Model codeModel = static_cast(CM); getModule().setCodeModel(codeModel); if ((CM == llvm::CodeModel::Medium || CM == llvm::CodeModel::Large) && Context.getTargetInfo().getTriple().getArch() == llvm::Triple::x86_64) { getModule().setLargeDataThreshold(getCodeGenOpts().LargeDataThreshold); } } } if (CodeGenOpts.NoPLT) getModule().setRtLibUseGOT(); if (getTriple().isOSBinFormatELF() && CodeGenOpts.DirectAccessExternalData != getModule().getDirectAccessExternalData()) { getModule().setDirectAccessExternalData( CodeGenOpts.DirectAccessExternalData); } if (CodeGenOpts.UnwindTables) getModule().setUwtable(llvm::UWTableKind(CodeGenOpts.UnwindTables)); switch (CodeGenOpts.getFramePointer()) { case CodeGenOptions::FramePointerKind::None: // 0 ("none") is the default. break; case CodeGenOptions::FramePointerKind::Reserved: getModule().setFramePointer(llvm::FramePointerKind::Reserved); break; case CodeGenOptions::FramePointerKind::NonLeaf: getModule().setFramePointer(llvm::FramePointerKind::NonLeaf); break; case CodeGenOptions::FramePointerKind::All: getModule().setFramePointer(llvm::FramePointerKind::All); break; } SimplifyPersonality(); if (getCodeGenOpts().EmitDeclMetadata) EmitDeclMetadata(); if (getCodeGenOpts().CoverageNotesFile.size() || getCodeGenOpts().CoverageDataFile.size()) EmitCoverageFile(); if (CGDebugInfo *DI = getModuleDebugInfo()) DI->finalize(); if (getCodeGenOpts().EmitVersionIdentMetadata) EmitVersionIdentMetadata(); if (!getCodeGenOpts().RecordCommandLine.empty()) EmitCommandLineMetadata(); if (!getCodeGenOpts().StackProtectorGuard.empty()) getModule().setStackProtectorGuard(getCodeGenOpts().StackProtectorGuard); if (!getCodeGenOpts().StackProtectorGuardReg.empty()) getModule().setStackProtectorGuardReg( getCodeGenOpts().StackProtectorGuardReg); if (!getCodeGenOpts().StackProtectorGuardSymbol.empty()) getModule().setStackProtectorGuardSymbol( getCodeGenOpts().StackProtectorGuardSymbol); if (getCodeGenOpts().StackProtectorGuardOffset != INT_MAX) getModule().setStackProtectorGuardOffset( getCodeGenOpts().StackProtectorGuardOffset); if (getCodeGenOpts().StackAlignment) getModule().setOverrideStackAlignment(getCodeGenOpts().StackAlignment); if (getCodeGenOpts().SkipRaxSetup) getModule().addModuleFlag(llvm::Module::Override, "SkipRaxSetup", 1); if (getLangOpts().RegCall4) getModule().addModuleFlag(llvm::Module::Override, "RegCallv4", 1); if (getContext().getTargetInfo().getMaxTLSAlign()) getModule().addModuleFlag(llvm::Module::Error, "MaxTLSAlign", getContext().getTargetInfo().getMaxTLSAlign()); getTargetCodeGenInfo().emitTargetGlobals(*this); getTargetCodeGenInfo().emitTargetMetadata(*this, MangledDeclNames); EmitBackendOptionsMetadata(getCodeGenOpts()); // If there is device offloading code embed it in the host now. EmbedObject(&getModule(), CodeGenOpts, getDiags()); // Set visibility from DLL storage class // We do this at the end of LLVM IR generation; after any operation // that might affect the DLL storage class or the visibility, and // before anything that might act on these. setVisibilityFromDLLStorageClass(LangOpts, getModule()); // Check the tail call symbols are truly undefined. if (getTriple().isPPC() && !MustTailCallUndefinedGlobals.empty()) { for (auto &I : MustTailCallUndefinedGlobals) { if (!I.first->isDefined()) getDiags().Report(I.second, diag::err_ppc_impossible_musttail) << 2; else { StringRef MangledName = getMangledName(GlobalDecl(I.first)); llvm::GlobalValue *Entry = GetGlobalValue(MangledName); if (!Entry || Entry->isWeakForLinker() || Entry->isDeclarationForLinker()) getDiags().Report(I.second, diag::err_ppc_impossible_musttail) << 2; } } } } void CodeGenModule::EmitOpenCLMetadata() { // SPIR v2.0 s2.13 - The OpenCL version used by the module is stored in the // opencl.ocl.version named metadata node. // C++ for OpenCL has a distinct mapping for versions compatible with OpenCL. auto CLVersion = LangOpts.getOpenCLCompatibleVersion(); auto EmitVersion = [this](StringRef MDName, int Version) { llvm::Metadata *OCLVerElts[] = { llvm::ConstantAsMetadata::get( llvm::ConstantInt::get(Int32Ty, Version / 100)), llvm::ConstantAsMetadata::get( llvm::ConstantInt::get(Int32Ty, (Version % 100) / 10))}; llvm::NamedMDNode *OCLVerMD = TheModule.getOrInsertNamedMetadata(MDName); llvm::LLVMContext &Ctx = TheModule.getContext(); OCLVerMD->addOperand(llvm::MDNode::get(Ctx, OCLVerElts)); }; EmitVersion("opencl.ocl.version", CLVersion); if (LangOpts.OpenCLCPlusPlus) { // In addition to the OpenCL compatible version, emit the C++ version. EmitVersion("opencl.cxx.version", LangOpts.OpenCLCPlusPlusVersion); } } void CodeGenModule::EmitBackendOptionsMetadata( const CodeGenOptions &CodeGenOpts) { if (getTriple().isRISCV()) { getModule().addModuleFlag(llvm::Module::Min, "SmallDataLimit", CodeGenOpts.SmallDataLimit); } } void CodeGenModule::UpdateCompletedType(const TagDecl *TD) { // Make sure that this type is translated. getTypes().UpdateCompletedType(TD); } void CodeGenModule::RefreshTypeCacheForClass(const CXXRecordDecl *RD) { // Make sure that this type is translated. getTypes().RefreshTypeCacheForClass(RD); } llvm::MDNode *CodeGenModule::getTBAATypeInfo(QualType QTy) { if (!TBAA) return nullptr; return TBAA->getTypeInfo(QTy); } TBAAAccessInfo CodeGenModule::getTBAAAccessInfo(QualType AccessType) { if (!TBAA) return TBAAAccessInfo(); if (getLangOpts().CUDAIsDevice) { // As CUDA builtin surface/texture types are replaced, skip generating TBAA // access info. if (AccessType->isCUDADeviceBuiltinSurfaceType()) { if (getTargetCodeGenInfo().getCUDADeviceBuiltinSurfaceDeviceType() != nullptr) return TBAAAccessInfo(); } else if (AccessType->isCUDADeviceBuiltinTextureType()) { if (getTargetCodeGenInfo().getCUDADeviceBuiltinTextureDeviceType() != nullptr) return TBAAAccessInfo(); } } return TBAA->getAccessInfo(AccessType); } TBAAAccessInfo CodeGenModule::getTBAAVTablePtrAccessInfo(llvm::Type *VTablePtrType) { if (!TBAA) return TBAAAccessInfo(); return TBAA->getVTablePtrAccessInfo(VTablePtrType); } llvm::MDNode *CodeGenModule::getTBAAStructInfo(QualType QTy) { if (!TBAA) return nullptr; return TBAA->getTBAAStructInfo(QTy); } llvm::MDNode *CodeGenModule::getTBAABaseTypeInfo(QualType QTy) { if (!TBAA) return nullptr; return TBAA->getBaseTypeInfo(QTy); } llvm::MDNode *CodeGenModule::getTBAAAccessTagInfo(TBAAAccessInfo Info) { if (!TBAA) return nullptr; return TBAA->getAccessTagInfo(Info); } TBAAAccessInfo CodeGenModule::mergeTBAAInfoForCast(TBAAAccessInfo SourceInfo, TBAAAccessInfo TargetInfo) { if (!TBAA) return TBAAAccessInfo(); return TBAA->mergeTBAAInfoForCast(SourceInfo, TargetInfo); } TBAAAccessInfo CodeGenModule::mergeTBAAInfoForConditionalOperator(TBAAAccessInfo InfoA, TBAAAccessInfo InfoB) { if (!TBAA) return TBAAAccessInfo(); return TBAA->mergeTBAAInfoForConditionalOperator(InfoA, InfoB); } TBAAAccessInfo CodeGenModule::mergeTBAAInfoForMemoryTransfer(TBAAAccessInfo DestInfo, TBAAAccessInfo SrcInfo) { if (!TBAA) return TBAAAccessInfo(); return TBAA->mergeTBAAInfoForConditionalOperator(DestInfo, SrcInfo); } void CodeGenModule::DecorateInstructionWithTBAA(llvm::Instruction *Inst, TBAAAccessInfo TBAAInfo) { if (llvm::MDNode *Tag = getTBAAAccessTagInfo(TBAAInfo)) Inst->setMetadata(llvm::LLVMContext::MD_tbaa, Tag); } void CodeGenModule::DecorateInstructionWithInvariantGroup( llvm::Instruction *I, const CXXRecordDecl *RD) { I->setMetadata(llvm::LLVMContext::MD_invariant_group, llvm::MDNode::get(getLLVMContext(), {})); } void CodeGenModule::Error(SourceLocation loc, StringRef message) { unsigned diagID = getDiags().getCustomDiagID(DiagnosticsEngine::Error, "%0"); getDiags().Report(Context.getFullLoc(loc), diagID) << message; } /// ErrorUnsupported - Print out an error that codegen doesn't support the /// specified stmt yet. void CodeGenModule::ErrorUnsupported(const Stmt *S, const char *Type) { unsigned DiagID = getDiags().getCustomDiagID(DiagnosticsEngine::Error, "cannot compile this %0 yet"); std::string Msg = Type; getDiags().Report(Context.getFullLoc(S->getBeginLoc()), DiagID) << Msg << S->getSourceRange(); } /// ErrorUnsupported - Print out an error that codegen doesn't support the /// specified decl yet. void CodeGenModule::ErrorUnsupported(const Decl *D, const char *Type) { unsigned DiagID = getDiags().getCustomDiagID(DiagnosticsEngine::Error, "cannot compile this %0 yet"); std::string Msg = Type; getDiags().Report(Context.getFullLoc(D->getLocation()), DiagID) << Msg; } llvm::ConstantInt *CodeGenModule::getSize(CharUnits size) { return llvm::ConstantInt::get(SizeTy, size.getQuantity()); } void CodeGenModule::setGlobalVisibility(llvm::GlobalValue *GV, const NamedDecl *D) const { // Internal definitions always have default visibility. if (GV->hasLocalLinkage()) { GV->setVisibility(llvm::GlobalValue::DefaultVisibility); return; } if (!D) return; // Set visibility for definitions, and for declarations if requested globally // or set explicitly. LinkageInfo LV = D->getLinkageAndVisibility(); // OpenMP declare target variables must be visible to the host so they can // be registered. We require protected visibility unless the variable has // the DT_nohost modifier and does not need to be registered. if (Context.getLangOpts().OpenMP && Context.getLangOpts().OpenMPIsTargetDevice && isa(D) && D->hasAttr() && D->getAttr()->getDevType() != OMPDeclareTargetDeclAttr::DT_NoHost && LV.getVisibility() == HiddenVisibility) { GV->setVisibility(llvm::GlobalValue::ProtectedVisibility); return; } if (GV->hasDLLExportStorageClass() || GV->hasDLLImportStorageClass()) { // Reject incompatible dlllstorage and visibility annotations. if (!LV.isVisibilityExplicit()) return; if (GV->hasDLLExportStorageClass()) { if (LV.getVisibility() == HiddenVisibility) getDiags().Report(D->getLocation(), diag::err_hidden_visibility_dllexport); } else if (LV.getVisibility() != DefaultVisibility) { getDiags().Report(D->getLocation(), diag::err_non_default_visibility_dllimport); } return; } if (LV.isVisibilityExplicit() || getLangOpts().SetVisibilityForExternDecls || !GV->isDeclarationForLinker()) GV->setVisibility(GetLLVMVisibility(LV.getVisibility())); } static bool shouldAssumeDSOLocal(const CodeGenModule &CGM, llvm::GlobalValue *GV) { if (GV->hasLocalLinkage()) return true; if (!GV->hasDefaultVisibility() && !GV->hasExternalWeakLinkage()) return true; // DLLImport explicitly marks the GV as external. if (GV->hasDLLImportStorageClass()) return false; const llvm::Triple &TT = CGM.getTriple(); const auto &CGOpts = CGM.getCodeGenOpts(); if (TT.isWindowsGNUEnvironment()) { // In MinGW, variables without DLLImport can still be automatically // imported from a DLL by the linker; don't mark variables that // potentially could come from another DLL as DSO local. // With EmulatedTLS, TLS variables can be autoimported from other DLLs // (and this actually happens in the public interface of libstdc++), so // such variables can't be marked as DSO local. (Native TLS variables // can't be dllimported at all, though.) if (GV->isDeclarationForLinker() && isa(GV) && (!GV->isThreadLocal() || CGM.getCodeGenOpts().EmulatedTLS) && CGOpts.AutoImport) return false; } // On COFF, don't mark 'extern_weak' symbols as DSO local. If these symbols // remain unresolved in the link, they can be resolved to zero, which is // outside the current DSO. if (TT.isOSBinFormatCOFF() && GV->hasExternalWeakLinkage()) return false; // Every other GV is local on COFF. // Make an exception for windows OS in the triple: Some firmware builds use // *-win32-macho triples. This (accidentally?) produced windows relocations // without GOT tables in older clang versions; Keep this behaviour. // FIXME: even thread local variables? if (TT.isOSBinFormatCOFF() || (TT.isOSWindows() && TT.isOSBinFormatMachO())) return true; // Only handle COFF and ELF for now. if (!TT.isOSBinFormatELF()) return false; // If this is not an executable, don't assume anything is local. llvm::Reloc::Model RM = CGOpts.RelocationModel; const auto &LOpts = CGM.getLangOpts(); if (RM != llvm::Reloc::Static && !LOpts.PIE) { // On ELF, if -fno-semantic-interposition is specified and the target // supports local aliases, there will be neither CC1 // -fsemantic-interposition nor -fhalf-no-semantic-interposition. Set // dso_local on the function if using a local alias is preferable (can avoid // PLT indirection). if (!(isa(GV) && GV->canBenefitFromLocalAlias())) return false; return !(CGM.getLangOpts().SemanticInterposition || CGM.getLangOpts().HalfNoSemanticInterposition); } // A definition cannot be preempted from an executable. if (!GV->isDeclarationForLinker()) return true; // Most PIC code sequences that assume that a symbol is local cannot produce a // 0 if it turns out the symbol is undefined. While this is ABI and relocation // depended, it seems worth it to handle it here. if (RM == llvm::Reloc::PIC_ && GV->hasExternalWeakLinkage()) return false; // PowerPC64 prefers TOC indirection to avoid copy relocations. if (TT.isPPC64()) return false; if (CGOpts.DirectAccessExternalData) { // If -fdirect-access-external-data (default for -fno-pic), set dso_local // for non-thread-local variables. If the symbol is not defined in the // executable, a copy relocation will be needed at link time. dso_local is // excluded for thread-local variables because they generally don't support // copy relocations. if (auto *Var = dyn_cast(GV)) if (!Var->isThreadLocal()) return true; // -fno-pic sets dso_local on a function declaration to allow direct // accesses when taking its address (similar to a data symbol). If the // function is not defined in the executable, a canonical PLT entry will be // needed at link time. -fno-direct-access-external-data can avoid the // canonical PLT entry. We don't generalize this condition to -fpie/-fpic as // it could just cause trouble without providing perceptible benefits. if (isa(GV) && !CGOpts.NoPLT && RM == llvm::Reloc::Static) return true; } // If we can use copy relocations we can assume it is local. // Otherwise don't assume it is local. return false; } void CodeGenModule::setDSOLocal(llvm::GlobalValue *GV) const { GV->setDSOLocal(shouldAssumeDSOLocal(*this, GV)); } void CodeGenModule::setDLLImportDLLExport(llvm::GlobalValue *GV, GlobalDecl GD) const { const auto *D = dyn_cast(GD.getDecl()); // C++ destructors have a few C++ ABI specific special cases. if (const auto *Dtor = dyn_cast_or_null(D)) { getCXXABI().setCXXDestructorDLLStorage(GV, Dtor, GD.getDtorType()); return; } setDLLImportDLLExport(GV, D); } void CodeGenModule::setDLLImportDLLExport(llvm::GlobalValue *GV, const NamedDecl *D) const { if (D && D->isExternallyVisible()) { if (D->hasAttr()) GV->setDLLStorageClass(llvm::GlobalVariable::DLLImportStorageClass); else if ((D->hasAttr() || shouldMapVisibilityToDLLExport(D)) && !GV->isDeclarationForLinker()) GV->setDLLStorageClass(llvm::GlobalVariable::DLLExportStorageClass); } } void CodeGenModule::setGVProperties(llvm::GlobalValue *GV, GlobalDecl GD) const { setDLLImportDLLExport(GV, GD); setGVPropertiesAux(GV, dyn_cast(GD.getDecl())); } void CodeGenModule::setGVProperties(llvm::GlobalValue *GV, const NamedDecl *D) const { setDLLImportDLLExport(GV, D); setGVPropertiesAux(GV, D); } void CodeGenModule::setGVPropertiesAux(llvm::GlobalValue *GV, const NamedDecl *D) const { setGlobalVisibility(GV, D); setDSOLocal(GV); GV->setPartition(CodeGenOpts.SymbolPartition); } static llvm::GlobalVariable::ThreadLocalMode GetLLVMTLSModel(StringRef S) { return llvm::StringSwitch(S) .Case("global-dynamic", llvm::GlobalVariable::GeneralDynamicTLSModel) .Case("local-dynamic", llvm::GlobalVariable::LocalDynamicTLSModel) .Case("initial-exec", llvm::GlobalVariable::InitialExecTLSModel) .Case("local-exec", llvm::GlobalVariable::LocalExecTLSModel); } llvm::GlobalVariable::ThreadLocalMode CodeGenModule::GetDefaultLLVMTLSModel() const { switch (CodeGenOpts.getDefaultTLSModel()) { case CodeGenOptions::GeneralDynamicTLSModel: return llvm::GlobalVariable::GeneralDynamicTLSModel; case CodeGenOptions::LocalDynamicTLSModel: return llvm::GlobalVariable::LocalDynamicTLSModel; case CodeGenOptions::InitialExecTLSModel: return llvm::GlobalVariable::InitialExecTLSModel; case CodeGenOptions::LocalExecTLSModel: return llvm::GlobalVariable::LocalExecTLSModel; } llvm_unreachable("Invalid TLS model!"); } void CodeGenModule::setTLSMode(llvm::GlobalValue *GV, const VarDecl &D) const { assert(D.getTLSKind() && "setting TLS mode on non-TLS var!"); llvm::GlobalValue::ThreadLocalMode TLM; TLM = GetDefaultLLVMTLSModel(); // Override the TLS model if it is explicitly specified. if (const TLSModelAttr *Attr = D.getAttr()) { TLM = GetLLVMTLSModel(Attr->getModel()); } GV->setThreadLocalMode(TLM); } static std::string getCPUSpecificMangling(const CodeGenModule &CGM, StringRef Name) { const TargetInfo &Target = CGM.getTarget(); return (Twine('.') + Twine(Target.CPUSpecificManglingCharacter(Name))).str(); } static void AppendCPUSpecificCPUDispatchMangling(const CodeGenModule &CGM, const CPUSpecificAttr *Attr, unsigned CPUIndex, raw_ostream &Out) { // cpu_specific gets the current name, dispatch gets the resolver if IFunc is // supported. if (Attr) Out << getCPUSpecificMangling(CGM, Attr->getCPUName(CPUIndex)->getName()); else if (CGM.getTarget().supportsIFunc()) Out << ".resolver"; } // Returns true if GD is a function decl with internal linkage and // needs a unique suffix after the mangled name. static bool isUniqueInternalLinkageDecl(GlobalDecl GD, CodeGenModule &CGM) { const Decl *D = GD.getDecl(); return !CGM.getModuleNameHash().empty() && isa(D) && (CGM.getFunctionLinkage(GD) == llvm::GlobalValue::InternalLinkage); } static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD, const NamedDecl *ND, bool OmitMultiVersionMangling = false) { SmallString<256> Buffer; llvm::raw_svector_ostream Out(Buffer); MangleContext &MC = CGM.getCXXABI().getMangleContext(); if (!CGM.getModuleNameHash().empty()) MC.needsUniqueInternalLinkageNames(); bool ShouldMangle = MC.shouldMangleDeclName(ND); if (ShouldMangle) MC.mangleName(GD.getWithDecl(ND), Out); else { IdentifierInfo *II = ND->getIdentifier(); assert(II && "Attempt to mangle unnamed decl."); const auto *FD = dyn_cast(ND); if (FD && FD->getType()->castAs()->getCallConv() == CC_X86RegCall) { if (CGM.getLangOpts().RegCall4) Out << "__regcall4__" << II->getName(); else Out << "__regcall3__" << II->getName(); } else if (FD && FD->hasAttr() && GD.getKernelReferenceKind() == KernelReferenceKind::Stub) { Out << "__device_stub__" << II->getName(); } else { Out << II->getName(); } } // Check if the module name hash should be appended for internal linkage // symbols. This should come before multi-version target suffixes are // appended. This is to keep the name and module hash suffix of the // internal linkage function together. The unique suffix should only be // added when name mangling is done to make sure that the final name can // be properly demangled. For example, for C functions without prototypes, // name mangling is not done and the unique suffix should not be appeneded // then. if (ShouldMangle && isUniqueInternalLinkageDecl(GD, CGM)) { assert(CGM.getCodeGenOpts().UniqueInternalLinkageNames && "Hash computed when not explicitly requested"); Out << CGM.getModuleNameHash(); } if (const auto *FD = dyn_cast(ND)) if (FD->isMultiVersion() && !OmitMultiVersionMangling) { switch (FD->getMultiVersionKind()) { case MultiVersionKind::CPUDispatch: case MultiVersionKind::CPUSpecific: AppendCPUSpecificCPUDispatchMangling(CGM, FD->getAttr(), GD.getMultiVersionIndex(), Out); break; case MultiVersionKind::Target: { auto *Attr = FD->getAttr(); assert(Attr && "Expected TargetAttr to be present " "for attribute mangling"); const ABIInfo &Info = CGM.getTargetCodeGenInfo().getABIInfo(); Info.appendAttributeMangling(Attr, Out); break; } case MultiVersionKind::TargetVersion: { auto *Attr = FD->getAttr(); assert(Attr && "Expected TargetVersionAttr to be present " "for attribute mangling"); const ABIInfo &Info = CGM.getTargetCodeGenInfo().getABIInfo(); Info.appendAttributeMangling(Attr, Out); break; } case MultiVersionKind::TargetClones: { auto *Attr = FD->getAttr(); assert(Attr && "Expected TargetClonesAttr to be present " "for attribute mangling"); unsigned Index = GD.getMultiVersionIndex(); const ABIInfo &Info = CGM.getTargetCodeGenInfo().getABIInfo(); Info.appendAttributeMangling(Attr, Index, Out); break; } case MultiVersionKind::None: llvm_unreachable("None multiversion type isn't valid here"); } } // Make unique name for device side static file-scope variable for HIP. if (CGM.getContext().shouldExternalize(ND) && CGM.getLangOpts().GPURelocatableDeviceCode && CGM.getLangOpts().CUDAIsDevice) CGM.printPostfixForExternalizedDecl(Out, ND); return std::string(Out.str()); } void CodeGenModule::UpdateMultiVersionNames(GlobalDecl GD, const FunctionDecl *FD, StringRef &CurName) { if (!FD->isMultiVersion()) return; // Get the name of what this would be without the 'target' attribute. This // allows us to lookup the version that was emitted when this wasn't a // multiversion function. std::string NonTargetName = getMangledNameImpl(*this, GD, FD, /*OmitMultiVersionMangling=*/true); GlobalDecl OtherGD; if (lookupRepresentativeDecl(NonTargetName, OtherGD)) { assert(OtherGD.getCanonicalDecl() .getDecl() ->getAsFunction() ->isMultiVersion() && "Other GD should now be a multiversioned function"); // OtherFD is the version of this function that was mangled BEFORE // becoming a MultiVersion function. It potentially needs to be updated. const FunctionDecl *OtherFD = OtherGD.getCanonicalDecl() .getDecl() ->getAsFunction() ->getMostRecentDecl(); std::string OtherName = getMangledNameImpl(*this, OtherGD, OtherFD); // This is so that if the initial version was already the 'default' // version, we don't try to update it. if (OtherName != NonTargetName) { // Remove instead of erase, since others may have stored the StringRef // to this. const auto ExistingRecord = Manglings.find(NonTargetName); if (ExistingRecord != std::end(Manglings)) Manglings.remove(&(*ExistingRecord)); auto Result = Manglings.insert(std::make_pair(OtherName, OtherGD)); StringRef OtherNameRef = MangledDeclNames[OtherGD.getCanonicalDecl()] = Result.first->first(); // If this is the current decl is being created, make sure we update the name. if (GD.getCanonicalDecl() == OtherGD.getCanonicalDecl()) CurName = OtherNameRef; if (llvm::GlobalValue *Entry = GetGlobalValue(NonTargetName)) Entry->setName(OtherName); } } } StringRef CodeGenModule::getMangledName(GlobalDecl GD) { GlobalDecl CanonicalGD = GD.getCanonicalDecl(); // Some ABIs don't have constructor variants. Make sure that base and // complete constructors get mangled the same. if (const auto *CD = dyn_cast(CanonicalGD.getDecl())) { if (!getTarget().getCXXABI().hasConstructorVariants()) { CXXCtorType OrigCtorType = GD.getCtorType(); assert(OrigCtorType == Ctor_Base || OrigCtorType == Ctor_Complete); if (OrigCtorType == Ctor_Base) CanonicalGD = GlobalDecl(CD, Ctor_Complete); } } // In CUDA/HIP device compilation with -fgpu-rdc, the mangled name of a // static device variable depends on whether the variable is referenced by // a host or device host function. Therefore the mangled name cannot be // cached. if (!LangOpts.CUDAIsDevice || !getContext().mayExternalize(GD.getDecl())) { auto FoundName = MangledDeclNames.find(CanonicalGD); if (FoundName != MangledDeclNames.end()) return FoundName->second; } // Keep the first result in the case of a mangling collision. const auto *ND = cast(GD.getDecl()); std::string MangledName = getMangledNameImpl(*this, GD, ND); // Ensure either we have different ABIs between host and device compilations, // says host compilation following MSVC ABI but device compilation follows // Itanium C++ ABI or, if they follow the same ABI, kernel names after // mangling should be the same after name stubbing. The later checking is // very important as the device kernel name being mangled in host-compilation // is used to resolve the device binaries to be executed. Inconsistent naming // result in undefined behavior. Even though we cannot check that naming // directly between host- and device-compilations, the host- and // device-mangling in host compilation could help catching certain ones. assert(!isa(ND) || !ND->hasAttr() || getContext().shouldExternalize(ND) || getLangOpts().CUDAIsDevice || (getContext().getAuxTargetInfo() && (getContext().getAuxTargetInfo()->getCXXABI() != getContext().getTargetInfo().getCXXABI())) || getCUDARuntime().getDeviceSideName(ND) == getMangledNameImpl( *this, GD.getWithKernelReferenceKind(KernelReferenceKind::Kernel), ND)); auto Result = Manglings.insert(std::make_pair(MangledName, GD)); return MangledDeclNames[CanonicalGD] = Result.first->first(); } StringRef CodeGenModule::getBlockMangledName(GlobalDecl GD, const BlockDecl *BD) { MangleContext &MangleCtx = getCXXABI().getMangleContext(); const Decl *D = GD.getDecl(); SmallString<256> Buffer; llvm::raw_svector_ostream Out(Buffer); if (!D) MangleCtx.mangleGlobalBlock(BD, dyn_cast_or_null(initializedGlobalDecl.getDecl()), Out); else if (const auto *CD = dyn_cast(D)) MangleCtx.mangleCtorBlock(CD, GD.getCtorType(), BD, Out); else if (const auto *DD = dyn_cast(D)) MangleCtx.mangleDtorBlock(DD, GD.getDtorType(), BD, Out); else MangleCtx.mangleBlock(cast(D), BD, Out); auto Result = Manglings.insert(std::make_pair(Out.str(), BD)); return Result.first->first(); } const GlobalDecl CodeGenModule::getMangledNameDecl(StringRef Name) { auto it = MangledDeclNames.begin(); while (it != MangledDeclNames.end()) { if (it->second == Name) return it->first; it++; } return GlobalDecl(); } llvm::GlobalValue *CodeGenModule::GetGlobalValue(StringRef Name) { return getModule().getNamedValue(Name); } /// AddGlobalCtor - Add a function to the list that will be called before /// main() runs. void CodeGenModule::AddGlobalCtor(llvm::Function *Ctor, int Priority, unsigned LexOrder, llvm::Constant *AssociatedData) { // FIXME: Type coercion of void()* types. GlobalCtors.push_back(Structor(Priority, LexOrder, Ctor, AssociatedData)); } /// AddGlobalDtor - Add a function to the list that will be called /// when the module is unloaded. void CodeGenModule::AddGlobalDtor(llvm::Function *Dtor, int Priority, bool IsDtorAttrFunc) { if (CodeGenOpts.RegisterGlobalDtorsWithAtExit && (!getContext().getTargetInfo().getTriple().isOSAIX() || IsDtorAttrFunc)) { DtorsUsingAtExit[Priority].push_back(Dtor); return; } // FIXME: Type coercion of void()* types. GlobalDtors.push_back(Structor(Priority, ~0U, Dtor, nullptr)); } void CodeGenModule::EmitCtorList(CtorList &Fns, const char *GlobalName) { if (Fns.empty()) return; // Ctor function type is void()*. llvm::FunctionType* CtorFTy = llvm::FunctionType::get(VoidTy, false); llvm::Type *CtorPFTy = llvm::PointerType::get(CtorFTy, TheModule.getDataLayout().getProgramAddressSpace()); // Get the type of a ctor entry, { i32, void ()*, i8* }. llvm::StructType *CtorStructTy = llvm::StructType::get( Int32Ty, CtorPFTy, VoidPtrTy); // Construct the constructor and destructor arrays. ConstantInitBuilder builder(*this); auto ctors = builder.beginArray(CtorStructTy); for (const auto &I : Fns) { auto ctor = ctors.beginStruct(CtorStructTy); ctor.addInt(Int32Ty, I.Priority); ctor.add(I.Initializer); if (I.AssociatedData) ctor.add(I.AssociatedData); else ctor.addNullPointer(VoidPtrTy); ctor.finishAndAddTo(ctors); } auto list = ctors.finishAndCreateGlobal(GlobalName, getPointerAlign(), /*constant*/ false, llvm::GlobalValue::AppendingLinkage); // The LTO linker doesn't seem to like it when we set an alignment // on appending variables. Take it off as a workaround. list->setAlignment(std::nullopt); Fns.clear(); } llvm::GlobalValue::LinkageTypes CodeGenModule::getFunctionLinkage(GlobalDecl GD) { const auto *D = cast(GD.getDecl()); GVALinkage Linkage = getContext().GetGVALinkageForFunction(D); if (const auto *Dtor = dyn_cast(D)) return getCXXABI().getCXXDestructorLinkage(Linkage, Dtor, GD.getDtorType()); return getLLVMLinkageForDeclarator(D, Linkage); } llvm::ConstantInt *CodeGenModule::CreateCrossDsoCfiTypeId(llvm::Metadata *MD) { llvm::MDString *MDS = dyn_cast(MD); if (!MDS) return nullptr; return llvm::ConstantInt::get(Int64Ty, llvm::MD5Hash(MDS->getString())); } llvm::ConstantInt *CodeGenModule::CreateKCFITypeId(QualType T) { if (auto *FnType = T->getAs()) T = getContext().getFunctionType( FnType->getReturnType(), FnType->getParamTypes(), FnType->getExtProtoInfo().withExceptionSpec(EST_None)); std::string OutName; llvm::raw_string_ostream Out(OutName); getCXXABI().getMangleContext().mangleCanonicalTypeName( T, Out, getCodeGenOpts().SanitizeCfiICallNormalizeIntegers); if (getCodeGenOpts().SanitizeCfiICallNormalizeIntegers) Out << ".normalized"; return llvm::ConstantInt::get(Int32Ty, static_cast(llvm::xxHash64(OutName))); } void CodeGenModule::SetLLVMFunctionAttributes(GlobalDecl GD, const CGFunctionInfo &Info, llvm::Function *F, bool IsThunk) { unsigned CallingConv; llvm::AttributeList PAL; ConstructAttributeList(F->getName(), Info, GD, PAL, CallingConv, /*AttrOnCallSite=*/false, IsThunk); if (CallingConv == llvm::CallingConv::X86_VectorCall && getTarget().getTriple().isWindowsArm64EC()) { SourceLocation Loc; if (const Decl *D = GD.getDecl()) Loc = D->getLocation(); Error(Loc, "__vectorcall calling convention is not currently supported"); } F->setAttributes(PAL); F->setCallingConv(static_cast(CallingConv)); } static void removeImageAccessQualifier(std::string& TyName) { std::string ReadOnlyQual("__read_only"); std::string::size_type ReadOnlyPos = TyName.find(ReadOnlyQual); if (ReadOnlyPos != std::string::npos) // "+ 1" for the space after access qualifier. TyName.erase(ReadOnlyPos, ReadOnlyQual.size() + 1); else { std::string WriteOnlyQual("__write_only"); std::string::size_type WriteOnlyPos = TyName.find(WriteOnlyQual); if (WriteOnlyPos != std::string::npos) TyName.erase(WriteOnlyPos, WriteOnlyQual.size() + 1); else { std::string ReadWriteQual("__read_write"); std::string::size_type ReadWritePos = TyName.find(ReadWriteQual); if (ReadWritePos != std::string::npos) TyName.erase(ReadWritePos, ReadWriteQual.size() + 1); } } } // Returns the address space id that should be produced to the // kernel_arg_addr_space metadata. This is always fixed to the ids // as specified in the SPIR 2.0 specification in order to differentiate // for example in clGetKernelArgInfo() implementation between the address // spaces with targets without unique mapping to the OpenCL address spaces // (basically all single AS CPUs). static unsigned ArgInfoAddressSpace(LangAS AS) { switch (AS) { case LangAS::opencl_global: return 1; case LangAS::opencl_constant: return 2; case LangAS::opencl_local: return 3; case LangAS::opencl_generic: return 4; // Not in SPIR 2.0 specs. case LangAS::opencl_global_device: return 5; case LangAS::opencl_global_host: return 6; default: return 0; // Assume private. } } void CodeGenModule::GenKernelArgMetadata(llvm::Function *Fn, const FunctionDecl *FD, CodeGenFunction *CGF) { assert(((FD && CGF) || (!FD && !CGF)) && "Incorrect use - FD and CGF should either be both null or not!"); // Create MDNodes that represent the kernel arg metadata. // Each MDNode is a list in the form of "key", N number of values which is // the same number of values as their are kernel arguments. const PrintingPolicy &Policy = Context.getPrintingPolicy(); // MDNode for the kernel argument address space qualifiers. SmallVector addressQuals; // MDNode for the kernel argument access qualifiers (images only). SmallVector accessQuals; // MDNode for the kernel argument type names. SmallVector argTypeNames; // MDNode for the kernel argument base type names. SmallVector argBaseTypeNames; // MDNode for the kernel argument type qualifiers. SmallVector argTypeQuals; // MDNode for the kernel argument names. SmallVector argNames; if (FD && CGF) for (unsigned i = 0, e = FD->getNumParams(); i != e; ++i) { const ParmVarDecl *parm = FD->getParamDecl(i); // Get argument name. argNames.push_back(llvm::MDString::get(VMContext, parm->getName())); if (!getLangOpts().OpenCL) continue; QualType ty = parm->getType(); std::string typeQuals; // Get image and pipe access qualifier: if (ty->isImageType() || ty->isPipeType()) { const Decl *PDecl = parm; if (const auto *TD = ty->getAs()) PDecl = TD->getDecl(); const OpenCLAccessAttr *A = PDecl->getAttr(); if (A && A->isWriteOnly()) accessQuals.push_back(llvm::MDString::get(VMContext, "write_only")); else if (A && A->isReadWrite()) accessQuals.push_back(llvm::MDString::get(VMContext, "read_write")); else accessQuals.push_back(llvm::MDString::get(VMContext, "read_only")); } else accessQuals.push_back(llvm::MDString::get(VMContext, "none")); auto getTypeSpelling = [&](QualType Ty) { auto typeName = Ty.getUnqualifiedType().getAsString(Policy); if (Ty.isCanonical()) { StringRef typeNameRef = typeName; // Turn "unsigned type" to "utype" if (typeNameRef.consume_front("unsigned ")) return std::string("u") + typeNameRef.str(); if (typeNameRef.consume_front("signed ")) return typeNameRef.str(); } return typeName; }; if (ty->isPointerType()) { QualType pointeeTy = ty->getPointeeType(); // Get address qualifier. addressQuals.push_back( llvm::ConstantAsMetadata::get(CGF->Builder.getInt32( ArgInfoAddressSpace(pointeeTy.getAddressSpace())))); // Get argument type name. std::string typeName = getTypeSpelling(pointeeTy) + "*"; std::string baseTypeName = getTypeSpelling(pointeeTy.getCanonicalType()) + "*"; argTypeNames.push_back(llvm::MDString::get(VMContext, typeName)); argBaseTypeNames.push_back( llvm::MDString::get(VMContext, baseTypeName)); // Get argument type qualifiers: if (ty.isRestrictQualified()) typeQuals = "restrict"; if (pointeeTy.isConstQualified() || (pointeeTy.getAddressSpace() == LangAS::opencl_constant)) typeQuals += typeQuals.empty() ? "const" : " const"; if (pointeeTy.isVolatileQualified()) typeQuals += typeQuals.empty() ? "volatile" : " volatile"; } else { uint32_t AddrSpc = 0; bool isPipe = ty->isPipeType(); if (ty->isImageType() || isPipe) AddrSpc = ArgInfoAddressSpace(LangAS::opencl_global); addressQuals.push_back( llvm::ConstantAsMetadata::get(CGF->Builder.getInt32(AddrSpc))); // Get argument type name. ty = isPipe ? ty->castAs()->getElementType() : ty; std::string typeName = getTypeSpelling(ty); std::string baseTypeName = getTypeSpelling(ty.getCanonicalType()); // Remove access qualifiers on images // (as they are inseparable from type in clang implementation, // but OpenCL spec provides a special query to get access qualifier // via clGetKernelArgInfo with CL_KERNEL_ARG_ACCESS_QUALIFIER): if (ty->isImageType()) { removeImageAccessQualifier(typeName); removeImageAccessQualifier(baseTypeName); } argTypeNames.push_back(llvm::MDString::get(VMContext, typeName)); argBaseTypeNames.push_back( llvm::MDString::get(VMContext, baseTypeName)); if (isPipe) typeQuals = "pipe"; } argTypeQuals.push_back(llvm::MDString::get(VMContext, typeQuals)); } if (getLangOpts().OpenCL) { Fn->setMetadata("kernel_arg_addr_space", llvm::MDNode::get(VMContext, addressQuals)); Fn->setMetadata("kernel_arg_access_qual", llvm::MDNode::get(VMContext, accessQuals)); Fn->setMetadata("kernel_arg_type", llvm::MDNode::get(VMContext, argTypeNames)); Fn->setMetadata("kernel_arg_base_type", llvm::MDNode::get(VMContext, argBaseTypeNames)); Fn->setMetadata("kernel_arg_type_qual", llvm::MDNode::get(VMContext, argTypeQuals)); } if (getCodeGenOpts().EmitOpenCLArgMetadata || getCodeGenOpts().HIPSaveKernelArgName) Fn->setMetadata("kernel_arg_name", llvm::MDNode::get(VMContext, argNames)); } /// Determines whether the language options require us to model /// unwind exceptions. We treat -fexceptions as mandating this /// except under the fragile ObjC ABI with only ObjC exceptions /// enabled. This means, for example, that C with -fexceptions /// enables this. static bool hasUnwindExceptions(const LangOptions &LangOpts) { // If exceptions are completely disabled, obviously this is false. if (!LangOpts.Exceptions) return false; // If C++ exceptions are enabled, this is true. if (LangOpts.CXXExceptions) return true; // If ObjC exceptions are enabled, this depends on the ABI. if (LangOpts.ObjCExceptions) { return LangOpts.ObjCRuntime.hasUnwindExceptions(); } return true; } static bool requiresMemberFunctionPointerTypeMetadata(CodeGenModule &CGM, const CXXMethodDecl *MD) { // Check that the type metadata can ever actually be used by a call. if (!CGM.getCodeGenOpts().LTOUnit || !CGM.HasHiddenLTOVisibility(MD->getParent())) return false; // Only functions whose address can be taken with a member function pointer // need this sort of type metadata. return MD->isImplicitObjectMemberFunction() && !MD->isVirtual() && !isa(MD); } SmallVector CodeGenModule::getMostBaseClasses(const CXXRecordDecl *RD) { llvm::SetVector MostBases; std::function CollectMostBases; CollectMostBases = [&](const CXXRecordDecl *RD) { if (RD->getNumBases() == 0) MostBases.insert(RD); for (const CXXBaseSpecifier &B : RD->bases()) CollectMostBases(B.getType()->getAsCXXRecordDecl()); }; CollectMostBases(RD); return MostBases.takeVector(); } void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, llvm::Function *F) { llvm::AttrBuilder B(F->getContext()); if ((!D || !D->hasAttr()) && CodeGenOpts.UnwindTables) B.addUWTableAttr(llvm::UWTableKind(CodeGenOpts.UnwindTables)); if (CodeGenOpts.StackClashProtector) B.addAttribute("probe-stack", "inline-asm"); if (CodeGenOpts.StackProbeSize && CodeGenOpts.StackProbeSize != 4096) B.addAttribute("stack-probe-size", std::to_string(CodeGenOpts.StackProbeSize)); if (!hasUnwindExceptions(LangOpts)) B.addAttribute(llvm::Attribute::NoUnwind); if (D && D->hasAttr()) ; // Do nothing. else if (D && D->hasAttr() && isStackProtectorOn(LangOpts, getTriple(), LangOptions::SSPOn)) B.addAttribute(llvm::Attribute::StackProtectStrong); else if (isStackProtectorOn(LangOpts, getTriple(), LangOptions::SSPOn)) B.addAttribute(llvm::Attribute::StackProtect); else if (isStackProtectorOn(LangOpts, getTriple(), LangOptions::SSPStrong)) B.addAttribute(llvm::Attribute::StackProtectStrong); else if (isStackProtectorOn(LangOpts, getTriple(), LangOptions::SSPReq)) B.addAttribute(llvm::Attribute::StackProtectReq); if (!D) { // If we don't have a declaration to control inlining, the function isn't // explicitly marked as alwaysinline for semantic reasons, and inlining is // disabled, mark the function as noinline. if (!F->hasFnAttribute(llvm::Attribute::AlwaysInline) && CodeGenOpts.getInlining() == CodeGenOptions::OnlyAlwaysInlining) B.addAttribute(llvm::Attribute::NoInline); F->addFnAttrs(B); return; } // Handle SME attributes that apply to function definitions, // rather than to function prototypes. if (D->hasAttr()) B.addAttribute("aarch64_pstate_sm_body"); if (auto *Attr = D->getAttr()) { if (Attr->isNewZA()) B.addAttribute("aarch64_new_za"); if (Attr->isNewZT0()) B.addAttribute("aarch64_new_zt0"); } // Track whether we need to add the optnone LLVM attribute, // starting with the default for this optimization level. bool ShouldAddOptNone = !CodeGenOpts.DisableO0ImplyOptNone && CodeGenOpts.OptimizationLevel == 0; // We can't add optnone in the following cases, it won't pass the verifier. ShouldAddOptNone &= !D->hasAttr(); ShouldAddOptNone &= !D->hasAttr(); // Add optnone, but do so only if the function isn't always_inline. if ((ShouldAddOptNone || D->hasAttr()) && !F->hasFnAttribute(llvm::Attribute::AlwaysInline)) { B.addAttribute(llvm::Attribute::OptimizeNone); // OptimizeNone implies noinline; we should not be inlining such functions. B.addAttribute(llvm::Attribute::NoInline); // We still need to handle naked functions even though optnone subsumes // much of their semantics. if (D->hasAttr()) B.addAttribute(llvm::Attribute::Naked); // OptimizeNone wins over OptimizeForSize and MinSize. F->removeFnAttr(llvm::Attribute::OptimizeForSize); F->removeFnAttr(llvm::Attribute::MinSize); } else if (D->hasAttr()) { // Naked implies noinline: we should not be inlining such functions. B.addAttribute(llvm::Attribute::Naked); B.addAttribute(llvm::Attribute::NoInline); } else if (D->hasAttr()) { B.addAttribute(llvm::Attribute::NoDuplicate); } else if (D->hasAttr() && !F->hasFnAttribute(llvm::Attribute::AlwaysInline)) { // Add noinline if the function isn't always_inline. B.addAttribute(llvm::Attribute::NoInline); } else if (D->hasAttr() && !F->hasFnAttribute(llvm::Attribute::NoInline)) { // (noinline wins over always_inline, and we can't specify both in IR) B.addAttribute(llvm::Attribute::AlwaysInline); } else if (CodeGenOpts.getInlining() == CodeGenOptions::OnlyAlwaysInlining) { // If we're not inlining, then force everything that isn't always_inline to // carry an explicit noinline attribute. if (!F->hasFnAttribute(llvm::Attribute::AlwaysInline)) B.addAttribute(llvm::Attribute::NoInline); } else { // Otherwise, propagate the inline hint attribute and potentially use its // absence to mark things as noinline. if (auto *FD = dyn_cast(D)) { // Search function and template pattern redeclarations for inline. auto CheckForInline = [](const FunctionDecl *FD) { auto CheckRedeclForInline = [](const FunctionDecl *Redecl) { return Redecl->isInlineSpecified(); }; if (any_of(FD->redecls(), CheckRedeclForInline)) return true; const FunctionDecl *Pattern = FD->getTemplateInstantiationPattern(); if (!Pattern) return false; return any_of(Pattern->redecls(), CheckRedeclForInline); }; if (CheckForInline(FD)) { B.addAttribute(llvm::Attribute::InlineHint); } else if (CodeGenOpts.getInlining() == CodeGenOptions::OnlyHintInlining && !FD->isInlined() && !F->hasFnAttribute(llvm::Attribute::AlwaysInline)) { B.addAttribute(llvm::Attribute::NoInline); } } } // Add other optimization related attributes if we are optimizing this // function. if (!D->hasAttr()) { if (D->hasAttr()) { if (!ShouldAddOptNone) B.addAttribute(llvm::Attribute::OptimizeForSize); B.addAttribute(llvm::Attribute::Cold); } if (D->hasAttr()) B.addAttribute(llvm::Attribute::Hot); if (D->hasAttr()) B.addAttribute(llvm::Attribute::MinSize); } F->addFnAttrs(B); unsigned alignment = D->getMaxAlignment() / Context.getCharWidth(); if (alignment) F->setAlignment(llvm::Align(alignment)); if (!D->hasAttr()) if (LangOpts.FunctionAlignment) F->setAlignment(llvm::Align(1ull << LangOpts.FunctionAlignment)); // Some C++ ABIs require 2-byte alignment for member functions, in order to // reserve a bit for differentiating between virtual and non-virtual member // functions. If the current target's C++ ABI requires this and this is a // member function, set its alignment accordingly. if (getTarget().getCXXABI().areMemberFunctionsAligned()) { if (isa(D) && F->getPointerAlignment(getDataLayout()) < 2) F->setAlignment(std::max(llvm::Align(2), F->getAlign().valueOrOne())); } // In the cross-dso CFI mode with canonical jump tables, we want !type // attributes on definitions only. if (CodeGenOpts.SanitizeCfiCrossDso && CodeGenOpts.SanitizeCfiCanonicalJumpTables) { if (auto *FD = dyn_cast(D)) { // Skip available_externally functions. They won't be codegen'ed in the // current module anyway. if (getContext().GetGVALinkageForFunction(FD) != GVA_AvailableExternally) CreateFunctionTypeMetadataForIcall(FD, F); } } // Emit type metadata on member functions for member function pointer checks. // These are only ever necessary on definitions; we're guaranteed that the // definition will be present in the LTO unit as a result of LTO visibility. auto *MD = dyn_cast(D); if (MD && requiresMemberFunctionPointerTypeMetadata(*this, MD)) { for (const CXXRecordDecl *Base : getMostBaseClasses(MD->getParent())) { llvm::Metadata *Id = CreateMetadataIdentifierForType(Context.getMemberPointerType( MD->getType(), Context.getRecordType(Base).getTypePtr())); F->addTypeMetadata(0, Id); } } } void CodeGenModule::SetCommonAttributes(GlobalDecl GD, llvm::GlobalValue *GV) { const Decl *D = GD.getDecl(); if (isa_and_nonnull(D)) setGVProperties(GV, GD); else GV->setVisibility(llvm::GlobalValue::DefaultVisibility); if (D && D->hasAttr()) addUsedOrCompilerUsedGlobal(GV); if (const auto *VD = dyn_cast_if_present(D); VD && ((CodeGenOpts.KeepPersistentStorageVariables && (VD->getStorageDuration() == SD_Static || VD->getStorageDuration() == SD_Thread)) || (CodeGenOpts.KeepStaticConsts && VD->getStorageDuration() == SD_Static && VD->getType().isConstQualified()))) addUsedOrCompilerUsedGlobal(GV); } bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD, llvm::AttrBuilder &Attrs, bool SetTargetFeatures) { // Add target-cpu and target-features attributes to functions. If // we have a decl for the function and it has a target attribute then // parse that and add it to the feature set. StringRef TargetCPU = getTarget().getTargetOpts().CPU; StringRef TuneCPU = getTarget().getTargetOpts().TuneCPU; std::vector Features; const auto *FD = dyn_cast_or_null(GD.getDecl()); FD = FD ? FD->getMostRecentDecl() : FD; const auto *TD = FD ? FD->getAttr() : nullptr; const auto *TV = FD ? FD->getAttr() : nullptr; assert((!TD || !TV) && "both target_version and target specified"); const auto *SD = FD ? FD->getAttr() : nullptr; const auto *TC = FD ? FD->getAttr() : nullptr; bool AddedAttr = false; if (TD || TV || SD || TC) { llvm::StringMap FeatureMap; getContext().getFunctionFeatureMap(FeatureMap, GD); // Produce the canonical string for this set of features. for (const llvm::StringMap::value_type &Entry : FeatureMap) Features.push_back((Entry.getValue() ? "+" : "-") + Entry.getKey().str()); // Now add the target-cpu and target-features to the function. // While we populated the feature map above, we still need to // get and parse the target attribute so we can get the cpu for // the function. if (TD) { ParsedTargetAttr ParsedAttr = Target.parseTargetAttr(TD->getFeaturesStr()); if (!ParsedAttr.CPU.empty() && getTarget().isValidCPUName(ParsedAttr.CPU)) { TargetCPU = ParsedAttr.CPU; TuneCPU = ""; // Clear the tune CPU. } if (!ParsedAttr.Tune.empty() && getTarget().isValidCPUName(ParsedAttr.Tune)) TuneCPU = ParsedAttr.Tune; } if (SD) { // Apply the given CPU name as the 'tune-cpu' so that the optimizer can // favor this processor. TuneCPU = SD->getCPUName(GD.getMultiVersionIndex())->getName(); } } else { // Otherwise just add the existing target cpu and target features to the // function. Features = getTarget().getTargetOpts().Features; } if (!TargetCPU.empty()) { Attrs.addAttribute("target-cpu", TargetCPU); AddedAttr = true; } if (!TuneCPU.empty()) { Attrs.addAttribute("tune-cpu", TuneCPU); AddedAttr = true; } if (!Features.empty() && SetTargetFeatures) { llvm::erase_if(Features, [&](const std::string& F) { return getTarget().isReadOnlyFeature(F.substr(1)); }); llvm::sort(Features); Attrs.addAttribute("target-features", llvm::join(Features, ",")); AddedAttr = true; } return AddedAttr; } void CodeGenModule::setNonAliasAttributes(GlobalDecl GD, llvm::GlobalObject *GO) { const Decl *D = GD.getDecl(); SetCommonAttributes(GD, GO); if (D) { if (auto *GV = dyn_cast(GO)) { if (D->hasAttr()) addUsedGlobal(GV); if (auto *SA = D->getAttr()) GV->addAttribute("bss-section", SA->getName()); if (auto *SA = D->getAttr()) GV->addAttribute("data-section", SA->getName()); if (auto *SA = D->getAttr()) GV->addAttribute("rodata-section", SA->getName()); if (auto *SA = D->getAttr()) GV->addAttribute("relro-section", SA->getName()); } if (auto *F = dyn_cast(GO)) { if (D->hasAttr()) addUsedGlobal(F); if (auto *SA = D->getAttr()) if (!D->getAttr()) F->setSection(SA->getName()); llvm::AttrBuilder Attrs(F->getContext()); if (GetCPUAndFeaturesAttributes(GD, Attrs)) { // We know that GetCPUAndFeaturesAttributes will always have the // newest set, since it has the newest possible FunctionDecl, so the // new ones should replace the old. llvm::AttributeMask RemoveAttrs; RemoveAttrs.addAttribute("target-cpu"); RemoveAttrs.addAttribute("target-features"); RemoveAttrs.addAttribute("tune-cpu"); F->removeFnAttrs(RemoveAttrs); F->addFnAttrs(Attrs); } } if (const auto *CSA = D->getAttr()) GO->setSection(CSA->getName()); else if (const auto *SA = D->getAttr()) GO->setSection(SA->getName()); } getTargetCodeGenInfo().setTargetAttributes(D, GO, *this); } void CodeGenModule::SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI) { const Decl *D = GD.getDecl(); SetLLVMFunctionAttributes(GD, FI, F, /*IsThunk=*/false); SetLLVMFunctionAttributesForDefinition(D, F); F->setLinkage(llvm::Function::InternalLinkage); setNonAliasAttributes(GD, F); } static void setLinkageForGV(llvm::GlobalValue *GV, const NamedDecl *ND) { // Set linkage and visibility in case we never see a definition. LinkageInfo LV = ND->getLinkageAndVisibility(); // Don't set internal linkage on declarations. // "extern_weak" is overloaded in LLVM; we probably should have // separate linkage types for this. if (isExternallyVisible(LV.getLinkage()) && (ND->hasAttr() || ND->isWeakImported())) GV->setLinkage(llvm::GlobalValue::ExternalWeakLinkage); } void CodeGenModule::CreateFunctionTypeMetadataForIcall(const FunctionDecl *FD, llvm::Function *F) { // Only if we are checking indirect calls. if (!LangOpts.Sanitize.has(SanitizerKind::CFIICall)) return; // Non-static class methods are handled via vtable or member function pointer // checks elsewhere. if (isa(FD) && !cast(FD)->isStatic()) return; llvm::Metadata *MD = CreateMetadataIdentifierForType(FD->getType()); F->addTypeMetadata(0, MD); F->addTypeMetadata(0, CreateMetadataIdentifierGeneralized(FD->getType())); // Emit a hash-based bit set entry for cross-DSO calls. if (CodeGenOpts.SanitizeCfiCrossDso) if (auto CrossDsoTypeId = CreateCrossDsoCfiTypeId(MD)) F->addTypeMetadata(0, llvm::ConstantAsMetadata::get(CrossDsoTypeId)); } void CodeGenModule::setKCFIType(const FunctionDecl *FD, llvm::Function *F) { llvm::LLVMContext &Ctx = F->getContext(); llvm::MDBuilder MDB(Ctx); F->setMetadata(llvm::LLVMContext::MD_kcfi_type, llvm::MDNode::get( Ctx, MDB.createConstant(CreateKCFITypeId(FD->getType())))); } static bool allowKCFIIdentifier(StringRef Name) { // KCFI type identifier constants are only necessary for external assembly // functions, which means it's safe to skip unusual names. Subset of // MCAsmInfo::isAcceptableChar() and MCAsmInfoXCOFF::isAcceptableChar(). return llvm::all_of(Name, [](const char &C) { return llvm::isAlnum(C) || C == '_' || C == '.'; }); } void CodeGenModule::finalizeKCFITypes() { llvm::Module &M = getModule(); for (auto &F : M.functions()) { // Remove KCFI type metadata from non-address-taken local functions. bool AddressTaken = F.hasAddressTaken(); if (!AddressTaken && F.hasLocalLinkage()) F.eraseMetadata(llvm::LLVMContext::MD_kcfi_type); // Generate a constant with the expected KCFI type identifier for all // address-taken function declarations to support annotating indirectly // called assembly functions. if (!AddressTaken || !F.isDeclaration()) continue; const llvm::ConstantInt *Type; if (const llvm::MDNode *MD = F.getMetadata(llvm::LLVMContext::MD_kcfi_type)) Type = llvm::mdconst::extract(MD->getOperand(0)); else continue; StringRef Name = F.getName(); if (!allowKCFIIdentifier(Name)) continue; std::string Asm = (".weak __kcfi_typeid_" + Name + "\n.set __kcfi_typeid_" + Name + ", " + Twine(Type->getZExtValue()) + "\n") .str(); M.appendModuleInlineAsm(Asm); } } void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, bool IsIncompleteFunction, bool IsThunk) { if (llvm::Intrinsic::ID IID = F->getIntrinsicID()) { // If this is an intrinsic function, set the function's attributes // to the intrinsic's attributes. F->setAttributes(llvm::Intrinsic::getAttributes(getLLVMContext(), IID)); return; } const auto *FD = cast(GD.getDecl()); if (!IsIncompleteFunction) SetLLVMFunctionAttributes(GD, getTypes().arrangeGlobalDeclaration(GD), F, IsThunk); // Add the Returned attribute for "this", except for iOS 5 and earlier // where substantial code, including the libstdc++ dylib, was compiled with // GCC and does not actually return "this". if (!IsThunk && getCXXABI().HasThisReturn(GD) && !(getTriple().isiOS() && getTriple().isOSVersionLT(6))) { assert(!F->arg_empty() && F->arg_begin()->getType() ->canLosslesslyBitCastTo(F->getReturnType()) && "unexpected this return"); F->addParamAttr(0, llvm::Attribute::Returned); } // Only a few attributes are set on declarations; these may later be // overridden by a definition. setLinkageForGV(F, FD); setGVProperties(F, FD); // Setup target-specific attributes. if (!IsIncompleteFunction && F->isDeclaration()) getTargetCodeGenInfo().setTargetAttributes(FD, F, *this); if (const auto *CSA = FD->getAttr()) F->setSection(CSA->getName()); else if (const auto *SA = FD->getAttr()) F->setSection(SA->getName()); if (const auto *EA = FD->getAttr()) { if (EA->isError()) F->addFnAttr("dontcall-error", EA->getUserDiagnostic()); else if (EA->isWarning()) F->addFnAttr("dontcall-warn", EA->getUserDiagnostic()); } // If we plan on emitting this inline builtin, we can't treat it as a builtin. if (FD->isInlineBuiltinDeclaration()) { const FunctionDecl *FDBody; bool HasBody = FD->hasBody(FDBody); (void)HasBody; assert(HasBody && "Inline builtin declarations should always have an " "available body!"); if (shouldEmitFunction(FDBody)) F->addFnAttr(llvm::Attribute::NoBuiltin); } if (FD->isReplaceableGlobalAllocationFunction()) { // A replaceable global allocation function does not act like a builtin by // default, only if it is invoked by a new-expression or delete-expression. F->addFnAttr(llvm::Attribute::NoBuiltin); } if (isa(FD) || isa(FD)) F->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); else if (const auto *MD = dyn_cast(FD)) if (MD->isVirtual()) F->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); // Don't emit entries for function declarations in the cross-DSO mode. This // is handled with better precision by the receiving DSO. But if jump tables // are non-canonical then we need type metadata in order to produce the local // jump table. if (!CodeGenOpts.SanitizeCfiCrossDso || !CodeGenOpts.SanitizeCfiCanonicalJumpTables) CreateFunctionTypeMetadataForIcall(FD, F); if (LangOpts.Sanitize.has(SanitizerKind::KCFI)) setKCFIType(FD, F); if (getLangOpts().OpenMP && FD->hasAttr()) getOpenMPRuntime().emitDeclareSimdFunction(FD, F); if (CodeGenOpts.InlineMaxStackSize != UINT_MAX) F->addFnAttr("inline-max-stacksize", llvm::utostr(CodeGenOpts.InlineMaxStackSize)); if (const auto *CB = FD->getAttr()) { // Annotate the callback behavior as metadata: // - The callback callee (as argument number). // - The callback payloads (as argument numbers). llvm::LLVMContext &Ctx = F->getContext(); llvm::MDBuilder MDB(Ctx); // The payload indices are all but the first one in the encoding. The first // identifies the callback callee. int CalleeIdx = *CB->encoding_begin(); ArrayRef PayloadIndices(CB->encoding_begin() + 1, CB->encoding_end()); F->addMetadata(llvm::LLVMContext::MD_callback, *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( CalleeIdx, PayloadIndices, /* VarArgsArePassed */ false)})); } } void CodeGenModule::addUsedGlobal(llvm::GlobalValue *GV) { assert((isa(GV) || !GV->isDeclaration()) && "Only globals with definition can force usage."); LLVMUsed.emplace_back(GV); } void CodeGenModule::addCompilerUsedGlobal(llvm::GlobalValue *GV) { assert(!GV->isDeclaration() && "Only globals with definition can force usage."); LLVMCompilerUsed.emplace_back(GV); } void CodeGenModule::addUsedOrCompilerUsedGlobal(llvm::GlobalValue *GV) { assert((isa(GV) || !GV->isDeclaration()) && "Only globals with definition can force usage."); if (getTriple().isOSBinFormatELF()) LLVMCompilerUsed.emplace_back(GV); else LLVMUsed.emplace_back(GV); } static void emitUsed(CodeGenModule &CGM, StringRef Name, std::vector &List) { // Don't create llvm.used if there is no need. if (List.empty()) return; // Convert List to what ConstantArray needs. SmallVector UsedArray; UsedArray.resize(List.size()); for (unsigned i = 0, e = List.size(); i != e; ++i) { UsedArray[i] = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( cast(&*List[i]), CGM.Int8PtrTy); } if (UsedArray.empty()) return; llvm::ArrayType *ATy = llvm::ArrayType::get(CGM.Int8PtrTy, UsedArray.size()); auto *GV = new llvm::GlobalVariable( CGM.getModule(), ATy, false, llvm::GlobalValue::AppendingLinkage, llvm::ConstantArray::get(ATy, UsedArray), Name); GV->setSection("llvm.metadata"); } void CodeGenModule::emitLLVMUsed() { emitUsed(*this, "llvm.used", LLVMUsed); emitUsed(*this, "llvm.compiler.used", LLVMCompilerUsed); } void CodeGenModule::AppendLinkerOptions(StringRef Opts) { auto *MDOpts = llvm::MDString::get(getLLVMContext(), Opts); LinkerOptionsMetadata.push_back(llvm::MDNode::get(getLLVMContext(), MDOpts)); } void CodeGenModule::AddDetectMismatch(StringRef Name, StringRef Value) { llvm::SmallString<32> Opt; getTargetCodeGenInfo().getDetectMismatchOption(Name, Value, Opt); if (Opt.empty()) return; auto *MDOpts = llvm::MDString::get(getLLVMContext(), Opt); LinkerOptionsMetadata.push_back(llvm::MDNode::get(getLLVMContext(), MDOpts)); } void CodeGenModule::AddDependentLib(StringRef Lib) { auto &C = getLLVMContext(); if (getTarget().getTriple().isOSBinFormatELF()) { ELFDependentLibraries.push_back( llvm::MDNode::get(C, llvm::MDString::get(C, Lib))); return; } llvm::SmallString<24> Opt; getTargetCodeGenInfo().getDependentLibraryOption(Lib, Opt); auto *MDOpts = llvm::MDString::get(getLLVMContext(), Opt); LinkerOptionsMetadata.push_back(llvm::MDNode::get(C, MDOpts)); } /// Add link options implied by the given module, including modules /// it depends on, using a postorder walk. static void addLinkOptionsPostorder(CodeGenModule &CGM, Module *Mod, SmallVectorImpl &Metadata, llvm::SmallPtrSet &Visited) { // Import this module's parent. if (Mod->Parent && Visited.insert(Mod->Parent).second) { addLinkOptionsPostorder(CGM, Mod->Parent, Metadata, Visited); } // Import this module's dependencies. for (Module *Import : llvm::reverse(Mod->Imports)) { if (Visited.insert(Import).second) addLinkOptionsPostorder(CGM, Import, Metadata, Visited); } // Add linker options to link against the libraries/frameworks // described by this module. llvm::LLVMContext &Context = CGM.getLLVMContext(); bool IsELF = CGM.getTarget().getTriple().isOSBinFormatELF(); // For modules that use export_as for linking, use that module // name instead. if (Mod->UseExportAsModuleLinkName) return; for (const Module::LinkLibrary &LL : llvm::reverse(Mod->LinkLibraries)) { // Link against a framework. Frameworks are currently Darwin only, so we // don't to ask TargetCodeGenInfo for the spelling of the linker option. if (LL.IsFramework) { llvm::Metadata *Args[2] = {llvm::MDString::get(Context, "-framework"), llvm::MDString::get(Context, LL.Library)}; Metadata.push_back(llvm::MDNode::get(Context, Args)); continue; } // Link against a library. if (IsELF) { llvm::Metadata *Args[2] = { llvm::MDString::get(Context, "lib"), llvm::MDString::get(Context, LL.Library), }; Metadata.push_back(llvm::MDNode::get(Context, Args)); } else { llvm::SmallString<24> Opt; CGM.getTargetCodeGenInfo().getDependentLibraryOption(LL.Library, Opt); auto *OptString = llvm::MDString::get(Context, Opt); Metadata.push_back(llvm::MDNode::get(Context, OptString)); } } } void CodeGenModule::EmitModuleInitializers(clang::Module *Primary) { assert(Primary->isNamedModuleUnit() && "We should only emit module initializers for named modules."); // Emit the initializers in the order that sub-modules appear in the // source, first Global Module Fragments, if present. if (auto GMF = Primary->getGlobalModuleFragment()) { for (Decl *D : getContext().getModuleInitializers(GMF)) { if (isa(D)) continue; assert(isa(D) && "GMF initializer decl is not a var?"); EmitTopLevelDecl(D); } } // Second any associated with the module, itself. for (Decl *D : getContext().getModuleInitializers(Primary)) { // Skip import decls, the inits for those are called explicitly. if (isa(D)) continue; EmitTopLevelDecl(D); } // Third any associated with the Privat eMOdule Fragment, if present. if (auto PMF = Primary->getPrivateModuleFragment()) { for (Decl *D : getContext().getModuleInitializers(PMF)) { // Skip import decls, the inits for those are called explicitly. if (isa(D)) continue; assert(isa(D) && "PMF initializer decl is not a var?"); EmitTopLevelDecl(D); } } } void CodeGenModule::EmitModuleLinkOptions() { // Collect the set of all of the modules we want to visit to emit link // options, which is essentially the imported modules and all of their // non-explicit child modules. llvm::SetVector LinkModules; llvm::SmallPtrSet Visited; SmallVector Stack; // Seed the stack with imported modules. for (Module *M : ImportedModules) { // Do not add any link flags when an implementation TU of a module imports // a header of that same module. if (M->getTopLevelModuleName() == getLangOpts().CurrentModule && !getLangOpts().isCompilingModule()) continue; if (Visited.insert(M).second) Stack.push_back(M); } // Find all of the modules to import, making a little effort to prune // non-leaf modules. while (!Stack.empty()) { clang::Module *Mod = Stack.pop_back_val(); bool AnyChildren = false; // Visit the submodules of this module. for (const auto &SM : Mod->submodules()) { // Skip explicit children; they need to be explicitly imported to be // linked against. if (SM->IsExplicit) continue; if (Visited.insert(SM).second) { Stack.push_back(SM); AnyChildren = true; } } // We didn't find any children, so add this module to the list of // modules to link against. if (!AnyChildren) { LinkModules.insert(Mod); } } // Add link options for all of the imported modules in reverse topological // order. We don't do anything to try to order import link flags with respect // to linker options inserted by things like #pragma comment(). SmallVector MetadataArgs; Visited.clear(); for (Module *M : LinkModules) if (Visited.insert(M).second) addLinkOptionsPostorder(*this, M, MetadataArgs, Visited); std::reverse(MetadataArgs.begin(), MetadataArgs.end()); LinkerOptionsMetadata.append(MetadataArgs.begin(), MetadataArgs.end()); // Add the linker options metadata flag. auto *NMD = getModule().getOrInsertNamedMetadata("llvm.linker.options"); for (auto *MD : LinkerOptionsMetadata) NMD->addOperand(MD); } void CodeGenModule::EmitDeferred() { // Emit deferred declare target declarations. if (getLangOpts().OpenMP && !getLangOpts().OpenMPSimd) getOpenMPRuntime().emitDeferredTargetDecls(); // Emit code for any potentially referenced deferred decls. Since a // previously unused static decl may become used during the generation of code // for a static function, iterate until no changes are made. if (!DeferredVTables.empty()) { EmitDeferredVTables(); // Emitting a vtable doesn't directly cause more vtables to // become deferred, although it can cause functions to be // emitted that then need those vtables. assert(DeferredVTables.empty()); } // Emit CUDA/HIP static device variables referenced by host code only. // Note we should not clear CUDADeviceVarODRUsedByHost since it is still // needed for further handling. if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) llvm::append_range(DeferredDeclsToEmit, getContext().CUDADeviceVarODRUsedByHost); // Stop if we're out of both deferred vtables and deferred declarations. if (DeferredDeclsToEmit.empty()) return; // Grab the list of decls to emit. If EmitGlobalDefinition schedules more // work, it will not interfere with this. std::vector CurDeclsToEmit; CurDeclsToEmit.swap(DeferredDeclsToEmit); for (GlobalDecl &D : CurDeclsToEmit) { // We should call GetAddrOfGlobal with IsForDefinition set to true in order // to get GlobalValue with exactly the type we need, not something that // might had been created for another decl with the same mangled name but // different type. llvm::GlobalValue *GV = dyn_cast( GetAddrOfGlobal(D, ForDefinition)); // In case of different address spaces, we may still get a cast, even with // IsForDefinition equal to true. Query mangled names table to get // GlobalValue. if (!GV) GV = GetGlobalValue(getMangledName(D)); // Make sure GetGlobalValue returned non-null. assert(GV); // Check to see if we've already emitted this. This is necessary // for a couple of reasons: first, decls can end up in the // deferred-decls queue multiple times, and second, decls can end // up with definitions in unusual ways (e.g. by an extern inline // function acquiring a strong function redefinition). Just // ignore these cases. if (!GV->isDeclaration()) continue; // If this is OpenMP, check if it is legal to emit this global normally. if (LangOpts.OpenMP && OpenMPRuntime && OpenMPRuntime->emitTargetGlobal(D)) continue; // Otherwise, emit the definition and move on to the next one. EmitGlobalDefinition(D, GV); // If we found out that we need to emit more decls, do that recursively. // This has the advantage that the decls are emitted in a DFS and related // ones are close together, which is convenient for testing. if (!DeferredVTables.empty() || !DeferredDeclsToEmit.empty()) { EmitDeferred(); assert(DeferredVTables.empty() && DeferredDeclsToEmit.empty()); } } } void CodeGenModule::EmitVTablesOpportunistically() { // Try to emit external vtables as available_externally if they have emitted // all inlined virtual functions. It runs after EmitDeferred() and therefore // is not allowed to create new references to things that need to be emitted // lazily. Note that it also uses fact that we eagerly emitting RTTI. assert((OpportunisticVTables.empty() || shouldOpportunisticallyEmitVTables()) && "Only emit opportunistic vtables with optimizations"); for (const CXXRecordDecl *RD : OpportunisticVTables) { assert(getVTables().isVTableExternal(RD) && "This queue should only contain external vtables"); if (getCXXABI().canSpeculativelyEmitVTable(RD)) VTables.GenerateClassData(RD); } OpportunisticVTables.clear(); } void CodeGenModule::EmitGlobalAnnotations() { for (const auto& [MangledName, VD] : DeferredAnnotations) { llvm::GlobalValue *GV = GetGlobalValue(MangledName); if (GV) AddGlobalAnnotations(VD, GV); } DeferredAnnotations.clear(); if (Annotations.empty()) return; // Create a new global variable for the ConstantStruct in the Module. llvm::Constant *Array = llvm::ConstantArray::get(llvm::ArrayType::get( Annotations[0]->getType(), Annotations.size()), Annotations); auto *gv = new llvm::GlobalVariable(getModule(), Array->getType(), false, llvm::GlobalValue::AppendingLinkage, Array, "llvm.global.annotations"); gv->setSection(AnnotationSection); } llvm::Constant *CodeGenModule::EmitAnnotationString(StringRef Str) { llvm::Constant *&AStr = AnnotationStrings[Str]; if (AStr) return AStr; // Not found yet, create a new global. llvm::Constant *s = llvm::ConstantDataArray::getString(getLLVMContext(), Str); auto *gv = new llvm::GlobalVariable( getModule(), s->getType(), true, llvm::GlobalValue::PrivateLinkage, s, ".str", nullptr, llvm::GlobalValue::NotThreadLocal, ConstGlobalsPtrTy->getAddressSpace()); gv->setSection(AnnotationSection); gv->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); AStr = gv; return gv; } llvm::Constant *CodeGenModule::EmitAnnotationUnit(SourceLocation Loc) { SourceManager &SM = getContext().getSourceManager(); PresumedLoc PLoc = SM.getPresumedLoc(Loc); if (PLoc.isValid()) return EmitAnnotationString(PLoc.getFilename()); return EmitAnnotationString(SM.getBufferName(Loc)); } llvm::Constant *CodeGenModule::EmitAnnotationLineNo(SourceLocation L) { SourceManager &SM = getContext().getSourceManager(); PresumedLoc PLoc = SM.getPresumedLoc(L); unsigned LineNo = PLoc.isValid() ? PLoc.getLine() : SM.getExpansionLineNumber(L); return llvm::ConstantInt::get(Int32Ty, LineNo); } llvm::Constant *CodeGenModule::EmitAnnotationArgs(const AnnotateAttr *Attr) { ArrayRef Exprs = {Attr->args_begin(), Attr->args_size()}; if (Exprs.empty()) return llvm::ConstantPointerNull::get(ConstGlobalsPtrTy); llvm::FoldingSetNodeID ID; for (Expr *E : Exprs) { ID.Add(cast(E)->getAPValueResult()); } llvm::Constant *&Lookup = AnnotationArgs[ID.ComputeHash()]; if (Lookup) return Lookup; llvm::SmallVector LLVMArgs; LLVMArgs.reserve(Exprs.size()); ConstantEmitter ConstEmiter(*this); llvm::transform(Exprs, std::back_inserter(LLVMArgs), [&](const Expr *E) { const auto *CE = cast(E); return ConstEmiter.emitAbstract(CE->getBeginLoc(), CE->getAPValueResult(), CE->getType()); }); auto *Struct = llvm::ConstantStruct::getAnon(LLVMArgs); auto *GV = new llvm::GlobalVariable(getModule(), Struct->getType(), true, llvm::GlobalValue::PrivateLinkage, Struct, ".args"); GV->setSection(AnnotationSection); GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); Lookup = GV; return GV; } llvm::Constant *CodeGenModule::EmitAnnotateAttr(llvm::GlobalValue *GV, const AnnotateAttr *AA, SourceLocation L) { // Get the globals for file name, annotation, and the line number. llvm::Constant *AnnoGV = EmitAnnotationString(AA->getAnnotation()), *UnitGV = EmitAnnotationUnit(L), *LineNoCst = EmitAnnotationLineNo(L), *Args = EmitAnnotationArgs(AA); llvm::Constant *GVInGlobalsAS = GV; if (GV->getAddressSpace() != getDataLayout().getDefaultGlobalsAddressSpace()) { GVInGlobalsAS = llvm::ConstantExpr::getAddrSpaceCast( GV, llvm::PointerType::get( GV->getContext(), getDataLayout().getDefaultGlobalsAddressSpace())); } // Create the ConstantStruct for the global annotation. llvm::Constant *Fields[] = { GVInGlobalsAS, AnnoGV, UnitGV, LineNoCst, Args, }; return llvm::ConstantStruct::getAnon(Fields); } void CodeGenModule::AddGlobalAnnotations(const ValueDecl *D, llvm::GlobalValue *GV) { assert(D->hasAttr() && "no annotate attribute"); // Get the struct elements for these annotations. for (const auto *I : D->specific_attrs()) Annotations.push_back(EmitAnnotateAttr(GV, I, D->getLocation())); } bool CodeGenModule::isInNoSanitizeList(SanitizerMask Kind, llvm::Function *Fn, SourceLocation Loc) const { const auto &NoSanitizeL = getContext().getNoSanitizeList(); // NoSanitize by function name. if (NoSanitizeL.containsFunction(Kind, Fn->getName())) return true; // NoSanitize by location. Check "mainfile" prefix. auto &SM = Context.getSourceManager(); FileEntryRef MainFile = *SM.getFileEntryRefForID(SM.getMainFileID()); if (NoSanitizeL.containsMainFile(Kind, MainFile.getName())) return true; // Check "src" prefix. if (Loc.isValid()) return NoSanitizeL.containsLocation(Kind, Loc); // If location is unknown, this may be a compiler-generated function. Assume // it's located in the main file. return NoSanitizeL.containsFile(Kind, MainFile.getName()); } bool CodeGenModule::isInNoSanitizeList(SanitizerMask Kind, llvm::GlobalVariable *GV, SourceLocation Loc, QualType Ty, StringRef Category) const { const auto &NoSanitizeL = getContext().getNoSanitizeList(); if (NoSanitizeL.containsGlobal(Kind, GV->getName(), Category)) return true; auto &SM = Context.getSourceManager(); if (NoSanitizeL.containsMainFile( Kind, SM.getFileEntryRefForID(SM.getMainFileID())->getName(), Category)) return true; if (NoSanitizeL.containsLocation(Kind, Loc, Category)) return true; // Check global type. if (!Ty.isNull()) { // Drill down the array types: if global variable of a fixed type is // not sanitized, we also don't instrument arrays of them. while (auto AT = dyn_cast(Ty.getTypePtr())) Ty = AT->getElementType(); Ty = Ty.getCanonicalType().getUnqualifiedType(); // Only record types (classes, structs etc.) are ignored. if (Ty->isRecordType()) { std::string TypeStr = Ty.getAsString(getContext().getPrintingPolicy()); if (NoSanitizeL.containsType(Kind, TypeStr, Category)) return true; } } return false; } bool CodeGenModule::imbueXRayAttrs(llvm::Function *Fn, SourceLocation Loc, StringRef Category) const { const auto &XRayFilter = getContext().getXRayFilter(); using ImbueAttr = XRayFunctionFilter::ImbueAttribute; auto Attr = ImbueAttr::NONE; if (Loc.isValid()) Attr = XRayFilter.shouldImbueLocation(Loc, Category); if (Attr == ImbueAttr::NONE) Attr = XRayFilter.shouldImbueFunction(Fn->getName()); switch (Attr) { case ImbueAttr::NONE: return false; case ImbueAttr::ALWAYS: Fn->addFnAttr("function-instrument", "xray-always"); break; case ImbueAttr::ALWAYS_ARG1: Fn->addFnAttr("function-instrument", "xray-always"); Fn->addFnAttr("xray-log-args", "1"); break; case ImbueAttr::NEVER: Fn->addFnAttr("function-instrument", "xray-never"); break; } return true; } ProfileList::ExclusionType CodeGenModule::isFunctionBlockedByProfileList(llvm::Function *Fn, SourceLocation Loc) const { const auto &ProfileList = getContext().getProfileList(); // If the profile list is empty, then instrument everything. if (ProfileList.isEmpty()) return ProfileList::Allow; CodeGenOptions::ProfileInstrKind Kind = getCodeGenOpts().getProfileInstr(); // First, check the function name. if (auto V = ProfileList.isFunctionExcluded(Fn->getName(), Kind)) return *V; // Next, check the source location. if (Loc.isValid()) if (auto V = ProfileList.isLocationExcluded(Loc, Kind)) return *V; // If location is unknown, this may be a compiler-generated function. Assume // it's located in the main file. auto &SM = Context.getSourceManager(); if (auto MainFile = SM.getFileEntryRefForID(SM.getMainFileID())) if (auto V = ProfileList.isFileExcluded(MainFile->getName(), Kind)) return *V; return ProfileList.getDefault(Kind); } ProfileList::ExclusionType CodeGenModule::isFunctionBlockedFromProfileInstr(llvm::Function *Fn, SourceLocation Loc) const { auto V = isFunctionBlockedByProfileList(Fn, Loc); if (V != ProfileList::Allow) return V; auto NumGroups = getCodeGenOpts().ProfileTotalFunctionGroups; if (NumGroups > 1) { auto Group = llvm::crc32(arrayRefFromStringRef(Fn->getName())) % NumGroups; if (Group != getCodeGenOpts().ProfileSelectedFunctionGroup) return ProfileList::Skip; } return ProfileList::Allow; } bool CodeGenModule::MustBeEmitted(const ValueDecl *Global) { // Never defer when EmitAllDecls is specified. if (LangOpts.EmitAllDecls) return true; const auto *VD = dyn_cast(Global); if (VD && ((CodeGenOpts.KeepPersistentStorageVariables && (VD->getStorageDuration() == SD_Static || VD->getStorageDuration() == SD_Thread)) || (CodeGenOpts.KeepStaticConsts && VD->getStorageDuration() == SD_Static && VD->getType().isConstQualified()))) return true; return getContext().DeclMustBeEmitted(Global); } bool CodeGenModule::MayBeEmittedEagerly(const ValueDecl *Global) { // In OpenMP 5.0 variables and function may be marked as // device_type(host/nohost) and we should not emit them eagerly unless we sure // that they must be emitted on the host/device. To be sure we need to have // seen a declare target with an explicit mentioning of the function, we know // we have if the level of the declare target attribute is -1. Note that we // check somewhere else if we should emit this at all. if (LangOpts.OpenMP >= 50 && !LangOpts.OpenMPSimd) { std::optional ActiveAttr = OMPDeclareTargetDeclAttr::getActiveAttr(Global); if (!ActiveAttr || (*ActiveAttr)->getLevel() != (unsigned)-1) return false; } if (const auto *FD = dyn_cast(Global)) { if (FD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation) // Implicit template instantiations may change linkage if they are later // explicitly instantiated, so they should not be emitted eagerly. return false; // Defer until all versions have been semantically checked. if (FD->hasAttr() && !FD->isMultiVersion()) return false; } if (const auto *VD = dyn_cast(Global)) { if (Context.getInlineVariableDefinitionKind(VD) == ASTContext::InlineVariableDefinitionKind::WeakUnknown) // A definition of an inline constexpr static data member may change // linkage later if it's redeclared outside the class. return false; if (CXX20ModuleInits && VD->getOwningModule() && !VD->getOwningModule()->isModuleMapModule()) { // For CXX20, module-owned initializers need to be deferred, since it is // not known at this point if they will be run for the current module or // as part of the initializer for an imported one. return false; } } // If OpenMP is enabled and threadprivates must be generated like TLS, delay // codegen for global variables, because they may be marked as threadprivate. if (LangOpts.OpenMP && LangOpts.OpenMPUseTLS && getContext().getTargetInfo().isTLSSupported() && isa(Global) && !Global->getType().isConstantStorage(getContext(), false, false) && !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(Global)) return false; return true; } ConstantAddress CodeGenModule::GetAddrOfMSGuidDecl(const MSGuidDecl *GD) { StringRef Name = getMangledName(GD); // The UUID descriptor should be pointer aligned. CharUnits Alignment = CharUnits::fromQuantity(PointerAlignInBytes); // Look for an existing global. if (llvm::GlobalVariable *GV = getModule().getNamedGlobal(Name)) return ConstantAddress(GV, GV->getValueType(), Alignment); ConstantEmitter Emitter(*this); llvm::Constant *Init; APValue &V = GD->getAsAPValue(); if (!V.isAbsent()) { // If possible, emit the APValue version of the initializer. In particular, // this gets the type of the constant right. Init = Emitter.emitForInitializer( GD->getAsAPValue(), GD->getType().getAddressSpace(), GD->getType()); } else { // As a fallback, directly construct the constant. // FIXME: This may get padding wrong under esoteric struct layout rules. // MSVC appears to create a complete type 'struct __s_GUID' that it // presumably uses to represent these constants. MSGuidDecl::Parts Parts = GD->getParts(); llvm::Constant *Fields[4] = { llvm::ConstantInt::get(Int32Ty, Parts.Part1), llvm::ConstantInt::get(Int16Ty, Parts.Part2), llvm::ConstantInt::get(Int16Ty, Parts.Part3), llvm::ConstantDataArray::getRaw( StringRef(reinterpret_cast(Parts.Part4And5), 8), 8, Int8Ty)}; Init = llvm::ConstantStruct::getAnon(Fields); } auto *GV = new llvm::GlobalVariable( getModule(), Init->getType(), /*isConstant=*/true, llvm::GlobalValue::LinkOnceODRLinkage, Init, Name); if (supportsCOMDAT()) GV->setComdat(TheModule.getOrInsertComdat(GV->getName())); setDSOLocal(GV); if (!V.isAbsent()) { Emitter.finalize(GV); return ConstantAddress(GV, GV->getValueType(), Alignment); } llvm::Type *Ty = getTypes().ConvertTypeForMem(GD->getType()); return ConstantAddress(GV, Ty, Alignment); } ConstantAddress CodeGenModule::GetAddrOfUnnamedGlobalConstantDecl( const UnnamedGlobalConstantDecl *GCD) { CharUnits Alignment = getContext().getTypeAlignInChars(GCD->getType()); llvm::GlobalVariable **Entry = nullptr; Entry = &UnnamedGlobalConstantDeclMap[GCD]; if (*Entry) return ConstantAddress(*Entry, (*Entry)->getValueType(), Alignment); ConstantEmitter Emitter(*this); llvm::Constant *Init; const APValue &V = GCD->getValue(); assert(!V.isAbsent()); Init = Emitter.emitForInitializer(V, GCD->getType().getAddressSpace(), GCD->getType()); auto *GV = new llvm::GlobalVariable(getModule(), Init->getType(), /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, Init, ".constant"); GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); GV->setAlignment(Alignment.getAsAlign()); Emitter.finalize(GV); *Entry = GV; return ConstantAddress(GV, GV->getValueType(), Alignment); } ConstantAddress CodeGenModule::GetAddrOfTemplateParamObject( const TemplateParamObjectDecl *TPO) { StringRef Name = getMangledName(TPO); CharUnits Alignment = getNaturalTypeAlignment(TPO->getType()); if (llvm::GlobalVariable *GV = getModule().getNamedGlobal(Name)) return ConstantAddress(GV, GV->getValueType(), Alignment); ConstantEmitter Emitter(*this); llvm::Constant *Init = Emitter.emitForInitializer( TPO->getValue(), TPO->getType().getAddressSpace(), TPO->getType()); if (!Init) { ErrorUnsupported(TPO, "template parameter object"); return ConstantAddress::invalid(); } llvm::GlobalValue::LinkageTypes Linkage = isExternallyVisible(TPO->getLinkageAndVisibility().getLinkage()) ? llvm::GlobalValue::LinkOnceODRLinkage : llvm::GlobalValue::InternalLinkage; auto *GV = new llvm::GlobalVariable(getModule(), Init->getType(), /*isConstant=*/true, Linkage, Init, Name); setGVProperties(GV, TPO); if (supportsCOMDAT()) GV->setComdat(TheModule.getOrInsertComdat(GV->getName())); Emitter.finalize(GV); return ConstantAddress(GV, GV->getValueType(), Alignment); } ConstantAddress CodeGenModule::GetWeakRefReference(const ValueDecl *VD) { const AliasAttr *AA = VD->getAttr(); assert(AA && "No alias?"); CharUnits Alignment = getContext().getDeclAlign(VD); llvm::Type *DeclTy = getTypes().ConvertTypeForMem(VD->getType()); // See if there is already something with the target's name in the module. llvm::GlobalValue *Entry = GetGlobalValue(AA->getAliasee()); if (Entry) return ConstantAddress(Entry, DeclTy, Alignment); llvm::Constant *Aliasee; if (isa(DeclTy)) Aliasee = GetOrCreateLLVMFunction(AA->getAliasee(), DeclTy, GlobalDecl(cast(VD)), /*ForVTable=*/false); else Aliasee = GetOrCreateLLVMGlobal(AA->getAliasee(), DeclTy, LangAS::Default, nullptr); auto *F = cast(Aliasee); F->setLinkage(llvm::Function::ExternalWeakLinkage); WeakRefReferences.insert(F); return ConstantAddress(Aliasee, DeclTy, Alignment); } template static bool hasImplicitAttr(const ValueDecl *D) { if (!D) return false; if (auto *A = D->getAttr()) return A->isImplicit(); return D->isImplicit(); } bool CodeGenModule::shouldEmitCUDAGlobalVar(const VarDecl *Global) const { assert(LangOpts.CUDA && "Should not be called by non-CUDA languages"); // We need to emit host-side 'shadows' for all global // device-side variables because the CUDA runtime needs their // size and host-side address in order to provide access to // their device-side incarnations. return !LangOpts.CUDAIsDevice || Global->hasAttr() || Global->hasAttr() || Global->hasAttr() || Global->getType()->isCUDADeviceBuiltinSurfaceType() || Global->getType()->isCUDADeviceBuiltinTextureType(); } void CodeGenModule::EmitGlobal(GlobalDecl GD) { const auto *Global = cast(GD.getDecl()); // Weak references don't produce any output by themselves. if (Global->hasAttr()) return; // If this is an alias definition (which otherwise looks like a declaration) // emit it now. if (Global->hasAttr()) return EmitAliasDefinition(GD); // IFunc like an alias whose value is resolved at runtime by calling resolver. if (Global->hasAttr()) return emitIFuncDefinition(GD); // If this is a cpu_dispatch multiversion function, emit the resolver. if (Global->hasAttr()) return emitCPUDispatchDefinition(GD); // If this is CUDA, be selective about which declarations we emit. // Non-constexpr non-lambda implicit host device functions are not emitted // unless they are used on device side. if (LangOpts.CUDA) { assert((isa(Global) || isa(Global)) && "Expected Variable or Function"); if (const auto *VD = dyn_cast(Global)) { if (!shouldEmitCUDAGlobalVar(VD)) return; } else if (LangOpts.CUDAIsDevice) { const auto *FD = dyn_cast(Global); if ((!Global->hasAttr() || (LangOpts.OffloadImplicitHostDeviceTemplates && hasImplicitAttr(FD) && hasImplicitAttr(FD) && !FD->isConstexpr() && !isLambdaCallOperator(FD) && !getContext().CUDAImplicitHostDeviceFunUsedByDevice.count(FD))) && !Global->hasAttr() && !(LangOpts.HIPStdPar && isa(Global) && !Global->hasAttr())) return; // Device-only functions are the only things we skip. } else if (!Global->hasAttr() && Global->hasAttr()) return; } if (LangOpts.OpenMP) { // If this is OpenMP, check if it is legal to emit this global normally. if (OpenMPRuntime && OpenMPRuntime->emitTargetGlobal(GD)) return; if (auto *DRD = dyn_cast(Global)) { if (MustBeEmitted(Global)) EmitOMPDeclareReduction(DRD); return; } if (auto *DMD = dyn_cast(Global)) { if (MustBeEmitted(Global)) EmitOMPDeclareMapper(DMD); return; } } // Ignore declarations, they will be emitted on their first use. if (const auto *FD = dyn_cast(Global)) { // Update deferred annotations with the latest declaration if the function // function was already used or defined. if (FD->hasAttr()) { StringRef MangledName = getMangledName(GD); if (GetGlobalValue(MangledName)) DeferredAnnotations[MangledName] = FD; } // Forward declarations are emitted lazily on first use. if (!FD->doesThisDeclarationHaveABody()) { if (!FD->doesDeclarationForceExternallyVisibleDefinition() && (!FD->isMultiVersion() || !getTarget().getTriple().isAArch64())) return; StringRef MangledName = getMangledName(GD); // Compute the function info and LLVM type. const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD); llvm::Type *Ty = getTypes().GetFunctionType(FI); GetOrCreateLLVMFunction(MangledName, Ty, GD, /*ForVTable=*/false, /*DontDefer=*/false); return; } } else { const auto *VD = cast(Global); assert(VD->isFileVarDecl() && "Cannot emit local var decl as global."); if (VD->isThisDeclarationADefinition() != VarDecl::Definition && !Context.isMSStaticDataMemberInlineDefinition(VD)) { if (LangOpts.OpenMP) { // Emit declaration of the must-be-emitted declare target variable. if (std::optional Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { // If this variable has external storage and doesn't require special // link handling we defer to its canonical definition. if (VD->hasExternalStorage() && Res != OMPDeclareTargetDeclAttr::MT_Link) return; bool UnifiedMemoryEnabled = getOpenMPRuntime().hasRequiresUnifiedSharedMemory(); if ((*Res == OMPDeclareTargetDeclAttr::MT_To || *Res == OMPDeclareTargetDeclAttr::MT_Enter) && !UnifiedMemoryEnabled) { (void)GetAddrOfGlobalVar(VD); } else { assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || ((*Res == OMPDeclareTargetDeclAttr::MT_To || *Res == OMPDeclareTargetDeclAttr::MT_Enter) && UnifiedMemoryEnabled)) && "Link clause or to clause with unified memory expected."); (void)getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); } return; } } // If this declaration may have caused an inline variable definition to // change linkage, make sure that it's emitted. if (Context.getInlineVariableDefinitionKind(VD) == ASTContext::InlineVariableDefinitionKind::Strong) GetAddrOfGlobalVar(VD); return; } } // Defer code generation to first use when possible, e.g. if this is an inline // function. If the global must always be emitted, do it eagerly if possible // to benefit from cache locality. if (MustBeEmitted(Global) && MayBeEmittedEagerly(Global)) { // Emit the definition if it can't be deferred. EmitGlobalDefinition(GD); addEmittedDeferredDecl(GD); return; } // If we're deferring emission of a C++ variable with an // initializer, remember the order in which it appeared in the file. if (getLangOpts().CPlusPlus && isa(Global) && cast(Global)->hasInit()) { DelayedCXXInitPosition[Global] = CXXGlobalInits.size(); CXXGlobalInits.push_back(nullptr); } StringRef MangledName = getMangledName(GD); if (GetGlobalValue(MangledName) != nullptr) { // The value has already been used and should therefore be emitted. addDeferredDeclToEmit(GD); } else if (MustBeEmitted(Global)) { // The value must be emitted, but cannot be emitted eagerly. assert(!MayBeEmittedEagerly(Global)); addDeferredDeclToEmit(GD); } else { // Otherwise, remember that we saw a deferred decl with this name. The // first use of the mangled name will cause it to move into // DeferredDeclsToEmit. DeferredDecls[MangledName] = GD; } } // Check if T is a class type with a destructor that's not dllimport. static bool HasNonDllImportDtor(QualType T) { if (const auto *RT = T->getBaseElementTypeUnsafe()->getAs()) if (CXXRecordDecl *RD = dyn_cast(RT->getDecl())) if (RD->getDestructor() && !RD->getDestructor()->hasAttr()) return true; return false; } namespace { struct FunctionIsDirectlyRecursive : public ConstStmtVisitor { const StringRef Name; const Builtin::Context &BI; FunctionIsDirectlyRecursive(StringRef N, const Builtin::Context &C) : Name(N), BI(C) {} bool VisitCallExpr(const CallExpr *E) { const FunctionDecl *FD = E->getDirectCallee(); if (!FD) return false; AsmLabelAttr *Attr = FD->getAttr(); if (Attr && Name == Attr->getLabel()) return true; unsigned BuiltinID = FD->getBuiltinID(); if (!BuiltinID || !BI.isLibFunction(BuiltinID)) return false; StringRef BuiltinName = BI.getName(BuiltinID); if (BuiltinName.starts_with("__builtin_") && Name == BuiltinName.slice(strlen("__builtin_"), StringRef::npos)) { return true; } return false; } bool VisitStmt(const Stmt *S) { for (const Stmt *Child : S->children()) if (Child && this->Visit(Child)) return true; return false; } }; // Make sure we're not referencing non-imported vars or functions. struct DLLImportFunctionVisitor : public RecursiveASTVisitor { bool SafeToInline = true; bool shouldVisitImplicitCode() const { return true; } bool VisitVarDecl(VarDecl *VD) { if (VD->getTLSKind()) { // A thread-local variable cannot be imported. SafeToInline = false; return SafeToInline; } // A variable definition might imply a destructor call. if (VD->isThisDeclarationADefinition()) SafeToInline = !HasNonDllImportDtor(VD->getType()); return SafeToInline; } bool VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *E) { if (const auto *D = E->getTemporary()->getDestructor()) SafeToInline = D->hasAttr(); return SafeToInline; } bool VisitDeclRefExpr(DeclRefExpr *E) { ValueDecl *VD = E->getDecl(); if (isa(VD)) SafeToInline = VD->hasAttr(); else if (VarDecl *V = dyn_cast(VD)) SafeToInline = !V->hasGlobalStorage() || V->hasAttr(); return SafeToInline; } bool VisitCXXConstructExpr(CXXConstructExpr *E) { SafeToInline = E->getConstructor()->hasAttr(); return SafeToInline; } bool VisitCXXMemberCallExpr(CXXMemberCallExpr *E) { CXXMethodDecl *M = E->getMethodDecl(); if (!M) { // Call through a pointer to member function. This is safe to inline. SafeToInline = true; } else { SafeToInline = M->hasAttr(); } return SafeToInline; } bool VisitCXXDeleteExpr(CXXDeleteExpr *E) { SafeToInline = E->getOperatorDelete()->hasAttr(); return SafeToInline; } bool VisitCXXNewExpr(CXXNewExpr *E) { SafeToInline = E->getOperatorNew()->hasAttr(); return SafeToInline; } }; } // isTriviallyRecursive - Check if this function calls another // decl that, because of the asm attribute or the other decl being a builtin, // ends up pointing to itself. bool CodeGenModule::isTriviallyRecursive(const FunctionDecl *FD) { StringRef Name; if (getCXXABI().getMangleContext().shouldMangleDeclName(FD)) { // asm labels are a special kind of mangling we have to support. AsmLabelAttr *Attr = FD->getAttr(); if (!Attr) return false; Name = Attr->getLabel(); } else { Name = FD->getName(); } FunctionIsDirectlyRecursive Walker(Name, Context.BuiltinInfo); const Stmt *Body = FD->getBody(); return Body ? Walker.Visit(Body) : false; } bool CodeGenModule::shouldEmitFunction(GlobalDecl GD) { if (getFunctionLinkage(GD) != llvm::Function::AvailableExternallyLinkage) return true; const auto *F = cast(GD.getDecl()); if (CodeGenOpts.OptimizationLevel == 0 && !F->hasAttr()) return false; // We don't import function bodies from other named module units since that // behavior may break ABI compatibility of the current unit. if (const Module *M = F->getOwningModule(); M && M->getTopLevelModule()->isNamedModule() && getContext().getCurrentNamedModule() != M->getTopLevelModule()) { // There are practices to mark template member function as always-inline // and mark the template as extern explicit instantiation but not give // the definition for member function. So we have to emit the function // from explicitly instantiation with always-inline. // // See https://github.com/llvm/llvm-project/issues/86893 for details. // // TODO: Maybe it is better to give it a warning if we call a non-inline // function from other module units which is marked as always-inline. if (!F->isTemplateInstantiation() || !F->hasAttr()) { return false; } } if (F->hasAttr()) return false; if (F->hasAttr() && !F->hasAttr()) { // Check whether it would be safe to inline this dllimport function. DLLImportFunctionVisitor Visitor; Visitor.TraverseFunctionDecl(const_cast(F)); if (!Visitor.SafeToInline) return false; if (const CXXDestructorDecl *Dtor = dyn_cast(F)) { // Implicit destructor invocations aren't captured in the AST, so the // check above can't see them. Check for them manually here. for (const Decl *Member : Dtor->getParent()->decls()) if (isa(Member)) if (HasNonDllImportDtor(cast(Member)->getType())) return false; for (const CXXBaseSpecifier &B : Dtor->getParent()->bases()) if (HasNonDllImportDtor(B.getType())) return false; } } // Inline builtins declaration must be emitted. They often are fortified // functions. if (F->isInlineBuiltinDeclaration()) return true; // PR9614. Avoid cases where the source code is lying to us. An available // externally function should have an equivalent function somewhere else, // but a function that calls itself through asm label/`__builtin_` trickery is // clearly not equivalent to the real implementation. // This happens in glibc's btowc and in some configure checks. return !isTriviallyRecursive(F); } bool CodeGenModule::shouldOpportunisticallyEmitVTables() { return CodeGenOpts.OptimizationLevel > 0; } void CodeGenModule::EmitMultiVersionFunctionDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { const auto *FD = cast(GD.getDecl()); if (FD->isCPUSpecificMultiVersion()) { auto *Spec = FD->getAttr(); for (unsigned I = 0; I < Spec->cpus_size(); ++I) EmitGlobalFunctionDefinition(GD.getWithMultiVersionIndex(I), nullptr); } else if (auto *TC = FD->getAttr()) { for (unsigned I = 0; I < TC->featuresStrs_size(); ++I) // AArch64 favors the default target version over the clone if any. if ((!TC->isDefaultVersion(I) || !getTarget().getTriple().isAArch64()) && TC->isFirstOfVersion(I)) EmitGlobalFunctionDefinition(GD.getWithMultiVersionIndex(I), nullptr); // Ensure that the resolver function is also emitted. GetOrCreateMultiVersionResolver(GD); } else EmitGlobalFunctionDefinition(GD, GV); // Defer the resolver emission until we can reason whether the TU // contains a default target version implementation. if (FD->isTargetVersionMultiVersion()) AddDeferredMultiVersionResolverToEmit(GD); } void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { const auto *D = cast(GD.getDecl()); PrettyStackTraceDecl CrashInfo(const_cast(D), D->getLocation(), Context.getSourceManager(), "Generating code for declaration"); if (const auto *FD = dyn_cast(D)) { // At -O0, don't generate IR for functions with available_externally // linkage. if (!shouldEmitFunction(GD)) return; llvm::TimeTraceScope TimeScope("CodeGen Function", [&]() { std::string Name; llvm::raw_string_ostream OS(Name); FD->getNameForDiagnostic(OS, getContext().getPrintingPolicy(), /*Qualified=*/true); return Name; }); if (const auto *Method = dyn_cast(D)) { // Make sure to emit the definition(s) before we emit the thunks. // This is necessary for the generation of certain thunks. if (isa(Method) || isa(Method)) ABI->emitCXXStructor(GD); else if (FD->isMultiVersion()) EmitMultiVersionFunctionDefinition(GD, GV); else EmitGlobalFunctionDefinition(GD, GV); if (Method->isVirtual()) getVTables().EmitThunks(GD); return; } if (FD->isMultiVersion()) return EmitMultiVersionFunctionDefinition(GD, GV); return EmitGlobalFunctionDefinition(GD, GV); } if (const auto *VD = dyn_cast(D)) return EmitGlobalVarDefinition(VD, !VD->hasDefinition()); llvm_unreachable("Invalid argument to EmitGlobalDefinition()"); } static void ReplaceUsesOfNonProtoTypeWithRealFunction(llvm::GlobalValue *Old, llvm::Function *NewFn); static unsigned TargetMVPriority(const TargetInfo &TI, const CodeGenFunction::MultiVersionResolverOption &RO) { unsigned Priority = 0; unsigned NumFeatures = 0; for (StringRef Feat : RO.Conditions.Features) { Priority = std::max(Priority, TI.multiVersionSortPriority(Feat)); NumFeatures++; } if (!RO.Conditions.Architecture.empty()) Priority = std::max( Priority, TI.multiVersionSortPriority(RO.Conditions.Architecture)); Priority += TI.multiVersionFeatureCost() * NumFeatures; return Priority; } // Multiversion functions should be at most 'WeakODRLinkage' so that a different // TU can forward declare the function without causing problems. Particularly // in the cases of CPUDispatch, this causes issues. This also makes sure we // work with internal linkage functions, so that the same function name can be // used with internal linkage in multiple TUs. llvm::GlobalValue::LinkageTypes getMultiversionLinkage(CodeGenModule &CGM, GlobalDecl GD) { const FunctionDecl *FD = cast(GD.getDecl()); if (FD->getFormalLinkage() == Linkage::Internal) return llvm::GlobalValue::InternalLinkage; return llvm::GlobalValue::WeakODRLinkage; } void CodeGenModule::emitMultiVersionFunctions() { std::vector MVFuncsToEmit; MultiVersionFuncs.swap(MVFuncsToEmit); for (GlobalDecl GD : MVFuncsToEmit) { const auto *FD = cast(GD.getDecl()); assert(FD && "Expected a FunctionDecl"); auto createFunction = [&](const FunctionDecl *Decl, unsigned MVIdx = 0) { GlobalDecl CurGD{Decl->isDefined() ? Decl->getDefinition() : Decl, MVIdx}; StringRef MangledName = getMangledName(CurGD); llvm::Constant *Func = GetGlobalValue(MangledName); if (!Func) { if (Decl->isDefined()) { EmitGlobalFunctionDefinition(CurGD, nullptr); Func = GetGlobalValue(MangledName); } else { const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(CurGD); llvm::FunctionType *Ty = getTypes().GetFunctionType(FI); Func = GetAddrOfFunction(CurGD, Ty, /*ForVTable=*/false, /*DontDefer=*/false, ForDefinition); } assert(Func && "This should have just been created"); } return cast(Func); }; // For AArch64, a resolver is only emitted if a function marked with // target_version("default")) or target_clones() is present and defined // in this TU. For other architectures it is always emitted. bool ShouldEmitResolver = !getTarget().getTriple().isAArch64(); SmallVector Options; getContext().forEachMultiversionedFunctionVersion( FD, [&](const FunctionDecl *CurFD) { llvm::SmallVector Feats; bool IsDefined = CurFD->doesThisDeclarationHaveABody(); if (const auto *TA = CurFD->getAttr()) { TA->getAddedFeatures(Feats); llvm::Function *Func = createFunction(CurFD); Options.emplace_back(Func, TA->getArchitecture(), Feats); } else if (const auto *TVA = CurFD->getAttr()) { if (TVA->isDefaultVersion() && IsDefined) ShouldEmitResolver = true; TVA->getFeatures(Feats); llvm::Function *Func = createFunction(CurFD); Options.emplace_back(Func, /*Architecture*/ "", Feats); } else if (const auto *TC = CurFD->getAttr()) { if (IsDefined) ShouldEmitResolver = true; for (unsigned I = 0; I < TC->featuresStrs_size(); ++I) { if (!TC->isFirstOfVersion(I)) continue; llvm::Function *Func = createFunction(CurFD, I); StringRef Architecture; Feats.clear(); if (getTarget().getTriple().isAArch64()) TC->getFeatures(Feats, I); else { StringRef Version = TC->getFeatureStr(I); if (Version.starts_with("arch=")) Architecture = Version.drop_front(sizeof("arch=") - 1); else if (Version != "default") Feats.push_back(Version); } Options.emplace_back(Func, Architecture, Feats); } } else llvm_unreachable("unexpected MultiVersionKind"); }); if (!ShouldEmitResolver) continue; llvm::Constant *ResolverConstant = GetOrCreateMultiVersionResolver(GD); if (auto *IFunc = dyn_cast(ResolverConstant)) { ResolverConstant = IFunc->getResolver(); if (FD->isTargetClonesMultiVersion() && !getTarget().getTriple().isAArch64()) { std::string MangledName = getMangledNameImpl( *this, GD, FD, /*OmitMultiVersionMangling=*/true); if (!GetGlobalValue(MangledName + ".ifunc")) { const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD); llvm::FunctionType *DeclTy = getTypes().GetFunctionType(FI); // In prior versions of Clang, the mangling for ifuncs incorrectly // included an .ifunc suffix. This alias is generated for backward // compatibility. It is deprecated, and may be removed in the future. auto *Alias = llvm::GlobalAlias::create( DeclTy, 0, getMultiversionLinkage(*this, GD), MangledName + ".ifunc", IFunc, &getModule()); SetCommonAttributes(FD, Alias); } } } llvm::Function *ResolverFunc = cast(ResolverConstant); ResolverFunc->setLinkage(getMultiversionLinkage(*this, GD)); if (!ResolverFunc->hasLocalLinkage() && supportsCOMDAT()) ResolverFunc->setComdat( getModule().getOrInsertComdat(ResolverFunc->getName())); const TargetInfo &TI = getTarget(); llvm::stable_sort( Options, [&TI](const CodeGenFunction::MultiVersionResolverOption &LHS, const CodeGenFunction::MultiVersionResolverOption &RHS) { return TargetMVPriority(TI, LHS) > TargetMVPriority(TI, RHS); }); CodeGenFunction CGF(*this); CGF.EmitMultiVersionResolver(ResolverFunc, Options); } // Ensure that any additions to the deferred decls list caused by emitting a // variant are emitted. This can happen when the variant itself is inline and // calls a function without linkage. if (!MVFuncsToEmit.empty()) EmitDeferred(); // Ensure that any additions to the multiversion funcs list from either the // deferred decls or the multiversion functions themselves are emitted. if (!MultiVersionFuncs.empty()) emitMultiVersionFunctions(); } static void replaceDeclarationWith(llvm::GlobalValue *Old, llvm::Constant *New) { assert(cast(Old)->isDeclaration() && "Not a declaration"); New->takeName(Old); Old->replaceAllUsesWith(New); Old->eraseFromParent(); } void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) { const auto *FD = cast(GD.getDecl()); assert(FD && "Not a FunctionDecl?"); assert(FD->isCPUDispatchMultiVersion() && "Not a multiversion function?"); const auto *DD = FD->getAttr(); assert(DD && "Not a cpu_dispatch Function?"); const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD); llvm::FunctionType *DeclTy = getTypes().GetFunctionType(FI); StringRef ResolverName = getMangledName(GD); UpdateMultiVersionNames(GD, FD, ResolverName); llvm::Type *ResolverType; GlobalDecl ResolverGD; if (getTarget().supportsIFunc()) { ResolverType = llvm::FunctionType::get( llvm::PointerType::get(DeclTy, getTypes().getTargetAddressSpace(FD->getType())), false); } else { ResolverType = DeclTy; ResolverGD = GD; } auto *ResolverFunc = cast(GetOrCreateLLVMFunction( ResolverName, ResolverType, ResolverGD, /*ForVTable=*/false)); ResolverFunc->setLinkage(getMultiversionLinkage(*this, GD)); if (supportsCOMDAT()) ResolverFunc->setComdat( getModule().getOrInsertComdat(ResolverFunc->getName())); SmallVector Options; const TargetInfo &Target = getTarget(); unsigned Index = 0; for (const IdentifierInfo *II : DD->cpus()) { // Get the name of the target function so we can look it up/create it. std::string MangledName = getMangledNameImpl(*this, GD, FD, true) + getCPUSpecificMangling(*this, II->getName()); llvm::Constant *Func = GetGlobalValue(MangledName); if (!Func) { GlobalDecl ExistingDecl = Manglings.lookup(MangledName); if (ExistingDecl.getDecl() && ExistingDecl.getDecl()->getAsFunction()->isDefined()) { EmitGlobalFunctionDefinition(ExistingDecl, nullptr); Func = GetGlobalValue(MangledName); } else { if (!ExistingDecl.getDecl()) ExistingDecl = GD.getWithMultiVersionIndex(Index); Func = GetOrCreateLLVMFunction( MangledName, DeclTy, ExistingDecl, /*ForVTable=*/false, /*DontDefer=*/true, /*IsThunk=*/false, llvm::AttributeList(), ForDefinition); } } llvm::SmallVector Features; Target.getCPUSpecificCPUDispatchFeatures(II->getName(), Features); llvm::transform(Features, Features.begin(), [](StringRef Str) { return Str.substr(1); }); llvm::erase_if(Features, [&Target](StringRef Feat) { return !Target.validateCpuSupports(Feat); }); Options.emplace_back(cast(Func), StringRef{}, Features); ++Index; } llvm::stable_sort( Options, [](const CodeGenFunction::MultiVersionResolverOption &LHS, const CodeGenFunction::MultiVersionResolverOption &RHS) { return llvm::X86::getCpuSupportsMask(LHS.Conditions.Features) > llvm::X86::getCpuSupportsMask(RHS.Conditions.Features); }); // If the list contains multiple 'default' versions, such as when it contains // 'pentium' and 'generic', don't emit the call to the generic one (since we // always run on at least a 'pentium'). We do this by deleting the 'least // advanced' (read, lowest mangling letter). while (Options.size() > 1 && llvm::all_of(llvm::X86::getCpuSupportsMask( (Options.end() - 2)->Conditions.Features), [](auto X) { return X == 0; })) { StringRef LHSName = (Options.end() - 2)->Function->getName(); StringRef RHSName = (Options.end() - 1)->Function->getName(); if (LHSName.compare(RHSName) < 0) Options.erase(Options.end() - 2); else Options.erase(Options.end() - 1); } CodeGenFunction CGF(*this); CGF.EmitMultiVersionResolver(ResolverFunc, Options); if (getTarget().supportsIFunc()) { llvm::GlobalValue::LinkageTypes Linkage = getMultiversionLinkage(*this, GD); auto *IFunc = cast(GetOrCreateMultiVersionResolver(GD)); // Fix up function declarations that were created for cpu_specific before // cpu_dispatch was known if (!isa(IFunc)) { auto *GI = llvm::GlobalIFunc::create(DeclTy, 0, Linkage, "", ResolverFunc, &getModule()); replaceDeclarationWith(IFunc, GI); IFunc = GI; } std::string AliasName = getMangledNameImpl( *this, GD, FD, /*OmitMultiVersionMangling=*/true); llvm::Constant *AliasFunc = GetGlobalValue(AliasName); if (!AliasFunc) { auto *GA = llvm::GlobalAlias::create(DeclTy, 0, Linkage, AliasName, IFunc, &getModule()); SetCommonAttributes(GD, GA); } } } /// Adds a declaration to the list of multi version functions if not present. void CodeGenModule::AddDeferredMultiVersionResolverToEmit(GlobalDecl GD) { const auto *FD = cast(GD.getDecl()); assert(FD && "Not a FunctionDecl?"); if (FD->isTargetVersionMultiVersion() || FD->isTargetClonesMultiVersion()) { std::string MangledName = getMangledNameImpl(*this, GD, FD, /*OmitMultiVersionMangling=*/true); if (!DeferredResolversToEmit.insert(MangledName).second) return; } MultiVersionFuncs.push_back(GD); } /// If a dispatcher for the specified mangled name is not in the module, create /// and return it. The dispatcher is either an llvm Function with the specified /// type, or a global ifunc. llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver(GlobalDecl GD) { const auto *FD = cast(GD.getDecl()); assert(FD && "Not a FunctionDecl?"); std::string MangledName = getMangledNameImpl(*this, GD, FD, /*OmitMultiVersionMangling=*/true); // Holds the name of the resolver, in ifunc mode this is the ifunc (which has // a separate resolver). std::string ResolverName = MangledName; if (getTarget().supportsIFunc()) { switch (FD->getMultiVersionKind()) { case MultiVersionKind::None: llvm_unreachable("unexpected MultiVersionKind::None for resolver"); case MultiVersionKind::Target: case MultiVersionKind::CPUSpecific: case MultiVersionKind::CPUDispatch: ResolverName += ".ifunc"; break; case MultiVersionKind::TargetClones: case MultiVersionKind::TargetVersion: break; } } else if (FD->isTargetMultiVersion()) { ResolverName += ".resolver"; } // If the resolver has already been created, just return it. This lookup may // yield a function declaration instead of a resolver on AArch64. That is // because we didn't know whether a resolver will be generated when we first // encountered a use of the symbol named after this resolver. Therefore, // targets which support ifuncs should not return here unless we actually // found an ifunc. llvm::GlobalValue *ResolverGV = GetGlobalValue(ResolverName); if (ResolverGV && (isa(ResolverGV) || !getTarget().supportsIFunc())) return ResolverGV; const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD); llvm::FunctionType *DeclTy = getTypes().GetFunctionType(FI); // The resolver needs to be created. For target and target_clones, defer // creation until the end of the TU. if (FD->isTargetMultiVersion() || FD->isTargetClonesMultiVersion()) AddDeferredMultiVersionResolverToEmit(GD); // For cpu_specific, don't create an ifunc yet because we don't know if the // cpu_dispatch will be emitted in this translation unit. if (getTarget().supportsIFunc() && !FD->isCPUSpecificMultiVersion()) { llvm::Type *ResolverType = llvm::FunctionType::get( llvm::PointerType::get(DeclTy, getTypes().getTargetAddressSpace(FD->getType())), false); llvm::Constant *Resolver = GetOrCreateLLVMFunction( MangledName + ".resolver", ResolverType, GlobalDecl{}, /*ForVTable=*/false); llvm::GlobalIFunc *GIF = llvm::GlobalIFunc::create(DeclTy, 0, getMultiversionLinkage(*this, GD), "", Resolver, &getModule()); GIF->setName(ResolverName); SetCommonAttributes(FD, GIF); if (ResolverGV) replaceDeclarationWith(ResolverGV, GIF); return GIF; } llvm::Constant *Resolver = GetOrCreateLLVMFunction( ResolverName, DeclTy, GlobalDecl{}, /*ForVTable=*/false); assert(isa(Resolver) && "Resolver should be created for the first time"); SetCommonAttributes(FD, cast(Resolver)); if (ResolverGV) replaceDeclarationWith(ResolverGV, Resolver); return Resolver; } bool CodeGenModule::shouldDropDLLAttribute(const Decl *D, const llvm::GlobalValue *GV) const { auto SC = GV->getDLLStorageClass(); if (SC == llvm::GlobalValue::DefaultStorageClass) return false; const Decl *MRD = D->getMostRecentDecl(); return (((SC == llvm::GlobalValue::DLLImportStorageClass && !MRD->hasAttr()) || (SC == llvm::GlobalValue::DLLExportStorageClass && !MRD->hasAttr())) && !shouldMapVisibilityToDLLExport(cast(MRD))); } /// GetOrCreateLLVMFunction - If the specified mangled name is not in the /// module, create and return an llvm Function with the specified type. If there /// is something in the module with the specified name, return it potentially /// bitcasted to the right type. /// /// If D is non-null, it specifies a decl that correspond to this. This is used /// to set the attributes on the function when it is first created. llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( StringRef MangledName, llvm::Type *Ty, GlobalDecl GD, bool ForVTable, bool DontDefer, bool IsThunk, llvm::AttributeList ExtraAttrs, ForDefinition_t IsForDefinition) { const Decl *D = GD.getDecl(); std::string NameWithoutMultiVersionMangling; // Any attempts to use a MultiVersion function should result in retrieving // the iFunc instead. Name Mangling will handle the rest of the changes. if (const FunctionDecl *FD = cast_or_null(D)) { // For the device mark the function as one that should be emitted. if (getLangOpts().OpenMPIsTargetDevice && OpenMPRuntime && !OpenMPRuntime->markAsGlobalTarget(GD) && FD->isDefined() && !DontDefer && !IsForDefinition) { if (const FunctionDecl *FDDef = FD->getDefinition()) { GlobalDecl GDDef; if (const auto *CD = dyn_cast(FDDef)) GDDef = GlobalDecl(CD, GD.getCtorType()); else if (const auto *DD = dyn_cast(FDDef)) GDDef = GlobalDecl(DD, GD.getDtorType()); else GDDef = GlobalDecl(FDDef); EmitGlobal(GDDef); } } if (FD->isMultiVersion()) { UpdateMultiVersionNames(GD, FD, MangledName); if (!IsForDefinition) { // On AArch64 we do not immediatelly emit an ifunc resolver when a // function is used. Instead we defer the emission until we see a // default definition. In the meantime we just reference the symbol // without FMV mangling (it may or may not be replaced later). if (getTarget().getTriple().isAArch64()) { AddDeferredMultiVersionResolverToEmit(GD); NameWithoutMultiVersionMangling = getMangledNameImpl( *this, GD, FD, /*OmitMultiVersionMangling=*/true); } else return GetOrCreateMultiVersionResolver(GD); } } } if (!NameWithoutMultiVersionMangling.empty()) MangledName = NameWithoutMultiVersionMangling; // Lookup the entry, lazily creating it if necessary. llvm::GlobalValue *Entry = GetGlobalValue(MangledName); if (Entry) { if (WeakRefReferences.erase(Entry)) { const FunctionDecl *FD = cast_or_null(D); if (FD && !FD->hasAttr()) Entry->setLinkage(llvm::Function::ExternalLinkage); } // Handle dropped DLL attributes. if (D && shouldDropDLLAttribute(D, Entry)) { Entry->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass); setDSOLocal(Entry); } // If there are two attempts to define the same mangled name, issue an // error. if (IsForDefinition && !Entry->isDeclaration()) { GlobalDecl OtherGD; // Check that GD is not yet in DiagnosedConflictingDefinitions is required // to make sure that we issue an error only once. if (lookupRepresentativeDecl(MangledName, OtherGD) && (GD.getCanonicalDecl().getDecl() != OtherGD.getCanonicalDecl().getDecl()) && DiagnosedConflictingDefinitions.insert(GD).second) { getDiags().Report(D->getLocation(), diag::err_duplicate_mangled_name) << MangledName; getDiags().Report(OtherGD.getDecl()->getLocation(), diag::note_previous_definition); } } if ((isa(Entry) || isa(Entry)) && (Entry->getValueType() == Ty)) { return Entry; } // Make sure the result is of the correct type. // (If function is requested for a definition, we always need to create a new // function, not just return a bitcast.) if (!IsForDefinition) return Entry; } // This function doesn't have a complete type (for example, the return // type is an incomplete struct). Use a fake type instead, and make // sure not to try to set attributes. bool IsIncompleteFunction = false; llvm::FunctionType *FTy; if (isa(Ty)) { FTy = cast(Ty); } else { FTy = llvm::FunctionType::get(VoidTy, false); IsIncompleteFunction = true; } llvm::Function *F = llvm::Function::Create(FTy, llvm::Function::ExternalLinkage, Entry ? StringRef() : MangledName, &getModule()); // Store the declaration associated with this function so it is potentially // updated by further declarations or definitions and emitted at the end. if (D && D->hasAttr()) DeferredAnnotations[MangledName] = cast(D); // If we already created a function with the same mangled name (but different // type) before, take its name and add it to the list of functions to be // replaced with F at the end of CodeGen. // // This happens if there is a prototype for a function (e.g. "int f()") and // then a definition of a different type (e.g. "int f(int x)"). if (Entry) { F->takeName(Entry); // This might be an implementation of a function without a prototype, in // which case, try to do special replacement of calls which match the new // prototype. The really key thing here is that we also potentially drop // arguments from the call site so as to make a direct call, which makes the // inliner happier and suppresses a number of optimizer warnings (!) about // dropping arguments. if (!Entry->use_empty()) { ReplaceUsesOfNonProtoTypeWithRealFunction(Entry, F); Entry->removeDeadConstantUsers(); } addGlobalValReplacement(Entry, F); } assert(F->getName() == MangledName && "name was uniqued!"); if (D) SetFunctionAttributes(GD, F, IsIncompleteFunction, IsThunk); if (ExtraAttrs.hasFnAttrs()) { llvm::AttrBuilder B(F->getContext(), ExtraAttrs.getFnAttrs()); F->addFnAttrs(B); } if (!DontDefer) { // All MSVC dtors other than the base dtor are linkonce_odr and delegate to // each other bottoming out with the base dtor. Therefore we emit non-base // dtors on usage, even if there is no dtor definition in the TU. if (isa_and_nonnull(D) && getCXXABI().useThunkForDtorVariant(cast(D), GD.getDtorType())) addDeferredDeclToEmit(GD); // This is the first use or definition of a mangled name. If there is a // deferred decl with this name, remember that we need to emit it at the end // of the file. auto DDI = DeferredDecls.find(MangledName); if (DDI != DeferredDecls.end()) { // Move the potentially referenced deferred decl to the // DeferredDeclsToEmit list, and remove it from DeferredDecls (since we // don't need it anymore). addDeferredDeclToEmit(DDI->second); DeferredDecls.erase(DDI); // Otherwise, there are cases we have to worry about where we're // using a declaration for which we must emit a definition but where // we might not find a top-level definition: // - member functions defined inline in their classes // - friend functions defined inline in some class // - special member functions with implicit definitions // If we ever change our AST traversal to walk into class methods, // this will be unnecessary. // // We also don't emit a definition for a function if it's going to be an // entry in a vtable, unless it's already marked as used. } else if (getLangOpts().CPlusPlus && D) { // Look for a declaration that's lexically in a record. for (const auto *FD = cast(D)->getMostRecentDecl(); FD; FD = FD->getPreviousDecl()) { if (isa(FD->getLexicalDeclContext())) { if (FD->doesThisDeclarationHaveABody()) { addDeferredDeclToEmit(GD.getWithDecl(FD)); break; } } } } } // Make sure the result is of the requested type. if (!IsIncompleteFunction) { assert(F->getFunctionType() == Ty); return F; } return F; } /// GetAddrOfFunction - Return the address of the given function. If Ty is /// non-null, then this function will use the specified type if it has to /// create it (this occurs when we see a definition of the function). llvm::Constant * CodeGenModule::GetAddrOfFunction(GlobalDecl GD, llvm::Type *Ty, bool ForVTable, bool DontDefer, ForDefinition_t IsForDefinition) { // If there was no specific requested type, just convert it now. if (!Ty) { const auto *FD = cast(GD.getDecl()); Ty = getTypes().ConvertType(FD->getType()); } // Devirtualized destructor calls may come through here instead of via // getAddrOfCXXStructor. Make sure we use the MS ABI base destructor instead // of the complete destructor when necessary. if (const auto *DD = dyn_cast(GD.getDecl())) { if (getTarget().getCXXABI().isMicrosoft() && GD.getDtorType() == Dtor_Complete && DD->getParent()->getNumVBases() == 0) GD = GlobalDecl(DD, Dtor_Base); } StringRef MangledName = getMangledName(GD); auto *F = GetOrCreateLLVMFunction(MangledName, Ty, GD, ForVTable, DontDefer, /*IsThunk=*/false, llvm::AttributeList(), IsForDefinition); // Returns kernel handle for HIP kernel stub function. if (LangOpts.CUDA && !LangOpts.CUDAIsDevice && cast(GD.getDecl())->hasAttr()) { auto *Handle = getCUDARuntime().getKernelHandle( cast(F->stripPointerCasts()), GD); if (IsForDefinition) return F; return Handle; } return F; } llvm::Constant *CodeGenModule::GetFunctionStart(const ValueDecl *Decl) { llvm::GlobalValue *F = cast(GetAddrOfFunction(Decl)->stripPointerCasts()); return llvm::NoCFIValue::get(F); } static const FunctionDecl * GetRuntimeFunctionDecl(ASTContext &C, StringRef Name) { TranslationUnitDecl *TUDecl = C.getTranslationUnitDecl(); DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl); IdentifierInfo &CII = C.Idents.get(Name); for (const auto *Result : DC->lookup(&CII)) if (const auto *FD = dyn_cast(Result)) return FD; if (!C.getLangOpts().CPlusPlus) return nullptr; // Demangle the premangled name from getTerminateFn() IdentifierInfo &CXXII = (Name == "_ZSt9terminatev" || Name == "?terminate@@YAXXZ") ? C.Idents.get("terminate") : C.Idents.get(Name); for (const auto &N : {"__cxxabiv1", "std"}) { IdentifierInfo &NS = C.Idents.get(N); for (const auto *Result : DC->lookup(&NS)) { const NamespaceDecl *ND = dyn_cast(Result); if (auto *LSD = dyn_cast(Result)) for (const auto *Result : LSD->lookup(&NS)) if ((ND = dyn_cast(Result))) break; if (ND) for (const auto *Result : ND->lookup(&CXXII)) if (const auto *FD = dyn_cast(Result)) return FD; } } return nullptr; } /// CreateRuntimeFunction - Create a new runtime function with the specified /// type and name. llvm::FunctionCallee CodeGenModule::CreateRuntimeFunction(llvm::FunctionType *FTy, StringRef Name, llvm::AttributeList ExtraAttrs, bool Local, bool AssumeConvergent) { if (AssumeConvergent) { ExtraAttrs = ExtraAttrs.addFnAttribute(VMContext, llvm::Attribute::Convergent); } llvm::Constant *C = GetOrCreateLLVMFunction(Name, FTy, GlobalDecl(), /*ForVTable=*/false, /*DontDefer=*/false, /*IsThunk=*/false, ExtraAttrs); if (auto *F = dyn_cast(C)) { if (F->empty()) { F->setCallingConv(getRuntimeCC()); // In Windows Itanium environments, try to mark runtime functions // dllimport. For Mingw and MSVC, don't. We don't really know if the user // will link their standard library statically or dynamically. Marking // functions imported when they are not imported can cause linker errors // and warnings. if (!Local && getTriple().isWindowsItaniumEnvironment() && !getCodeGenOpts().LTOVisibilityPublicStd) { const FunctionDecl *FD = GetRuntimeFunctionDecl(Context, Name); if (!FD || FD->hasAttr()) { F->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); F->setLinkage(llvm::GlobalValue::ExternalLinkage); } } setDSOLocal(F); // FIXME: We should use CodeGenModule::SetLLVMFunctionAttributes() instead // of trying to approximate the attributes using the LLVM function // signature. This requires revising the API of CreateRuntimeFunction(). markRegisterParameterAttributes(F); } } return {FTy, C}; } /// GetOrCreateLLVMGlobal - If the specified mangled name is not in the module, /// create and return an llvm GlobalVariable with the specified type and address /// space. If there is something in the module with the specified name, return /// it potentially bitcasted to the right type. /// /// If D is non-null, it specifies a decl that correspond to this. This is used /// to set the attributes on the global when it is first created. /// /// If IsForDefinition is true, it is guaranteed that an actual global with /// type Ty will be returned, not conversion of a variable with the same /// mangled name but some other type. llvm::Constant * CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, llvm::Type *Ty, LangAS AddrSpace, const VarDecl *D, ForDefinition_t IsForDefinition) { // Lookup the entry, lazily creating it if necessary. llvm::GlobalValue *Entry = GetGlobalValue(MangledName); unsigned TargetAS = getContext().getTargetAddressSpace(AddrSpace); if (Entry) { if (WeakRefReferences.erase(Entry)) { if (D && !D->hasAttr()) Entry->setLinkage(llvm::Function::ExternalLinkage); } // Handle dropped DLL attributes. if (D && shouldDropDLLAttribute(D, Entry)) Entry->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass); if (LangOpts.OpenMP && !LangOpts.OpenMPSimd && D) getOpenMPRuntime().registerTargetGlobalVariable(D, Entry); if (Entry->getValueType() == Ty && Entry->getAddressSpace() == TargetAS) return Entry; // If there are two attempts to define the same mangled name, issue an // error. if (IsForDefinition && !Entry->isDeclaration()) { GlobalDecl OtherGD; const VarDecl *OtherD; // Check that D is not yet in DiagnosedConflictingDefinitions is required // to make sure that we issue an error only once. if (D && lookupRepresentativeDecl(MangledName, OtherGD) && (D->getCanonicalDecl() != OtherGD.getCanonicalDecl().getDecl()) && (OtherD = dyn_cast(OtherGD.getDecl())) && OtherD->hasInit() && DiagnosedConflictingDefinitions.insert(D).second) { getDiags().Report(D->getLocation(), diag::err_duplicate_mangled_name) << MangledName; getDiags().Report(OtherGD.getDecl()->getLocation(), diag::note_previous_definition); } } // Make sure the result is of the correct type. if (Entry->getType()->getAddressSpace() != TargetAS) return llvm::ConstantExpr::getAddrSpaceCast( Entry, llvm::PointerType::get(Ty->getContext(), TargetAS)); // (If global is requested for a definition, we always need to create a new // global, not just return a bitcast.) if (!IsForDefinition) return Entry; } auto DAddrSpace = GetGlobalVarAddressSpace(D); auto *GV = new llvm::GlobalVariable( getModule(), Ty, false, llvm::GlobalValue::ExternalLinkage, nullptr, MangledName, nullptr, llvm::GlobalVariable::NotThreadLocal, getContext().getTargetAddressSpace(DAddrSpace)); // If we already created a global with the same mangled name (but different // type) before, take its name and remove it from its parent. if (Entry) { GV->takeName(Entry); if (!Entry->use_empty()) { Entry->replaceAllUsesWith(GV); } Entry->eraseFromParent(); } // This is the first use or definition of a mangled name. If there is a // deferred decl with this name, remember that we need to emit it at the end // of the file. auto DDI = DeferredDecls.find(MangledName); if (DDI != DeferredDecls.end()) { // Move the potentially referenced deferred decl to the DeferredDeclsToEmit // list, and remove it from DeferredDecls (since we don't need it anymore). addDeferredDeclToEmit(DDI->second); DeferredDecls.erase(DDI); } // Handle things which are present even on external declarations. if (D) { if (LangOpts.OpenMP && !LangOpts.OpenMPSimd) getOpenMPRuntime().registerTargetGlobalVariable(D, GV); // FIXME: This code is overly simple and should be merged with other global // handling. GV->setConstant(D->getType().isConstantStorage(getContext(), false, false)); GV->setAlignment(getContext().getDeclAlign(D).getAsAlign()); setLinkageForGV(GV, D); if (D->getTLSKind()) { if (D->getTLSKind() == VarDecl::TLS_Dynamic) CXXThreadLocals.push_back(D); setTLSMode(GV, *D); } setGVProperties(GV, D); // If required by the ABI, treat declarations of static data members with // inline initializers as definitions. if (getContext().isMSStaticDataMemberInlineDefinition(D)) { EmitGlobalVarDefinition(D); } // Emit section information for extern variables. if (D->hasExternalStorage()) { if (const SectionAttr *SA = D->getAttr()) GV->setSection(SA->getName()); } // Handle XCore specific ABI requirements. if (getTriple().getArch() == llvm::Triple::xcore && D->getLanguageLinkage() == CLanguageLinkage && D->getType().isConstant(Context) && isExternallyVisible(D->getLinkageAndVisibility().getLinkage())) GV->setSection(".cp.rodata"); // Handle code model attribute if (const auto *CMA = D->getAttr()) GV->setCodeModel(CMA->getModel()); // Check if we a have a const declaration with an initializer, we may be // able to emit it as available_externally to expose it's value to the // optimizer. if (Context.getLangOpts().CPlusPlus && GV->hasExternalLinkage() && D->getType().isConstQualified() && !GV->hasInitializer() && !D->hasDefinition() && D->hasInit() && !D->hasAttr()) { const auto *Record = Context.getBaseElementType(D->getType())->getAsCXXRecordDecl(); bool HasMutableFields = Record && Record->hasMutableFields(); if (!HasMutableFields) { const VarDecl *InitDecl; const Expr *InitExpr = D->getAnyInitializer(InitDecl); if (InitExpr) { ConstantEmitter emitter(*this); llvm::Constant *Init = emitter.tryEmitForInitializer(*InitDecl); if (Init) { auto *InitType = Init->getType(); if (GV->getValueType() != InitType) { // The type of the initializer does not match the definition. // This happens when an initializer has a different type from // the type of the global (because of padding at the end of a // structure for instance). GV->setName(StringRef()); // Make a new global with the correct type, this is now guaranteed // to work. auto *NewGV = cast( GetAddrOfGlobalVar(D, InitType, IsForDefinition) ->stripPointerCasts()); // Erase the old global, since it is no longer used. GV->eraseFromParent(); GV = NewGV; } else { GV->setInitializer(Init); GV->setConstant(true); GV->setLinkage(llvm::GlobalValue::AvailableExternallyLinkage); } emitter.finalize(GV); } } } } } if (D && D->isThisDeclarationADefinition(Context) == VarDecl::DeclarationOnly) { getTargetCodeGenInfo().setTargetAttributes(D, GV, *this); // External HIP managed variables needed to be recorded for transformation // in both device and host compilations. if (getLangOpts().CUDA && D && D->hasAttr() && D->hasExternalStorage()) getCUDARuntime().handleVarRegistration(D, *GV); } if (D) SanitizerMD->reportGlobal(GV, *D); LangAS ExpectedAS = D ? D->getType().getAddressSpace() : (LangOpts.OpenCL ? LangAS::opencl_global : LangAS::Default); assert(getContext().getTargetAddressSpace(ExpectedAS) == TargetAS); if (DAddrSpace != ExpectedAS) { return getTargetCodeGenInfo().performAddrSpaceCast( *this, GV, DAddrSpace, ExpectedAS, llvm::PointerType::get(getLLVMContext(), TargetAS)); } return GV; } llvm::Constant * CodeGenModule::GetAddrOfGlobal(GlobalDecl GD, ForDefinition_t IsForDefinition) { const Decl *D = GD.getDecl(); if (isa(D) || isa(D)) return getAddrOfCXXStructor(GD, /*FnInfo=*/nullptr, /*FnType=*/nullptr, /*DontDefer=*/false, IsForDefinition); if (isa(D)) { auto FInfo = &getTypes().arrangeCXXMethodDeclaration(cast(D)); auto Ty = getTypes().GetFunctionType(*FInfo); return GetAddrOfFunction(GD, Ty, /*ForVTable=*/false, /*DontDefer=*/false, IsForDefinition); } if (isa(D)) { const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD); llvm::FunctionType *Ty = getTypes().GetFunctionType(FI); return GetAddrOfFunction(GD, Ty, /*ForVTable=*/false, /*DontDefer=*/false, IsForDefinition); } return GetAddrOfGlobalVar(cast(D), /*Ty=*/nullptr, IsForDefinition); } llvm::GlobalVariable *CodeGenModule::CreateOrReplaceCXXRuntimeVariable( StringRef Name, llvm::Type *Ty, llvm::GlobalValue::LinkageTypes Linkage, llvm::Align Alignment) { llvm::GlobalVariable *GV = getModule().getNamedGlobal(Name); llvm::GlobalVariable *OldGV = nullptr; if (GV) { // Check if the variable has the right type. if (GV->getValueType() == Ty) return GV; // Because C++ name mangling, the only way we can end up with an already // existing global with the same name is if it has been declared extern "C". assert(GV->isDeclaration() && "Declaration has wrong type!"); OldGV = GV; } // Create a new variable. GV = new llvm::GlobalVariable(getModule(), Ty, /*isConstant=*/true, Linkage, nullptr, Name); if (OldGV) { // Replace occurrences of the old variable if needed. GV->takeName(OldGV); if (!OldGV->use_empty()) { OldGV->replaceAllUsesWith(GV); } OldGV->eraseFromParent(); } if (supportsCOMDAT() && GV->isWeakForLinker() && !GV->hasAvailableExternallyLinkage()) GV->setComdat(TheModule.getOrInsertComdat(GV->getName())); GV->setAlignment(Alignment); return GV; } /// GetAddrOfGlobalVar - Return the llvm::Constant for the address of the /// given global variable. If Ty is non-null and if the global doesn't exist, /// then it will be created with the specified type instead of whatever the /// normal requested type would be. If IsForDefinition is true, it is guaranteed /// that an actual global with type Ty will be returned, not conversion of a /// variable with the same mangled name but some other type. llvm::Constant *CodeGenModule::GetAddrOfGlobalVar(const VarDecl *D, llvm::Type *Ty, ForDefinition_t IsForDefinition) { assert(D->hasGlobalStorage() && "Not a global variable"); QualType ASTTy = D->getType(); if (!Ty) Ty = getTypes().ConvertTypeForMem(ASTTy); StringRef MangledName = getMangledName(D); return GetOrCreateLLVMGlobal(MangledName, Ty, ASTTy.getAddressSpace(), D, IsForDefinition); } /// CreateRuntimeVariable - Create a new runtime global variable with the /// specified type and name. llvm::Constant * CodeGenModule::CreateRuntimeVariable(llvm::Type *Ty, StringRef Name) { LangAS AddrSpace = getContext().getLangOpts().OpenCL ? LangAS::opencl_global : LangAS::Default; auto *Ret = GetOrCreateLLVMGlobal(Name, Ty, AddrSpace, nullptr); setDSOLocal(cast(Ret->stripPointerCasts())); return Ret; } void CodeGenModule::EmitTentativeDefinition(const VarDecl *D) { assert(!D->getInit() && "Cannot emit definite definitions here!"); StringRef MangledName = getMangledName(D); llvm::GlobalValue *GV = GetGlobalValue(MangledName); // We already have a definition, not declaration, with the same mangled name. // Emitting of declaration is not required (and actually overwrites emitted // definition). if (GV && !GV->isDeclaration()) return; // If we have not seen a reference to this variable yet, place it into the // deferred declarations table to be emitted if needed later. if (!MustBeEmitted(D) && !GV) { DeferredDecls[MangledName] = D; return; } // The tentative definition is the only definition. EmitGlobalVarDefinition(D); } void CodeGenModule::EmitExternalDeclaration(const DeclaratorDecl *D) { if (auto const *V = dyn_cast(D)) EmitExternalVarDeclaration(V); if (auto const *FD = dyn_cast(D)) EmitExternalFunctionDeclaration(FD); } CharUnits CodeGenModule::GetTargetTypeStoreSize(llvm::Type *Ty) const { return Context.toCharUnitsFromBits( getDataLayout().getTypeStoreSizeInBits(Ty)); } LangAS CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D) { if (LangOpts.OpenCL) { LangAS AS = D ? D->getType().getAddressSpace() : LangAS::opencl_global; assert(AS == LangAS::opencl_global || AS == LangAS::opencl_global_device || AS == LangAS::opencl_global_host || AS == LangAS::opencl_constant || AS == LangAS::opencl_local || AS >= LangAS::FirstTargetAddressSpace); return AS; } if (LangOpts.SYCLIsDevice && (!D || D->getType().getAddressSpace() == LangAS::Default)) return LangAS::sycl_global; if (LangOpts.CUDA && LangOpts.CUDAIsDevice) { if (D) { if (D->hasAttr()) return LangAS::cuda_constant; if (D->hasAttr()) return LangAS::cuda_shared; if (D->hasAttr()) return LangAS::cuda_device; if (D->getType().isConstQualified()) return LangAS::cuda_constant; } return LangAS::cuda_device; } if (LangOpts.OpenMP) { LangAS AS; if (OpenMPRuntime->hasAllocateAttributeForGlobalVar(D, AS)) return AS; } return getTargetCodeGenInfo().getGlobalVarAddressSpace(*this, D); } LangAS CodeGenModule::GetGlobalConstantAddressSpace() const { // OpenCL v1.2 s6.5.3: a string literal is in the constant address space. if (LangOpts.OpenCL) return LangAS::opencl_constant; if (LangOpts.SYCLIsDevice) return LangAS::sycl_global; if (LangOpts.HIP && LangOpts.CUDAIsDevice && getTriple().isSPIRV()) // For HIPSPV map literals to cuda_device (maps to CrossWorkGroup in SPIR-V) // instead of default AS (maps to Generic in SPIR-V). Otherwise, we end up // with OpVariable instructions with Generic storage class which is not // allowed (SPIR-V V1.6 s3.42.8). Also, mapping literals to SPIR-V // UniformConstant storage class is not viable as pointers to it may not be // casted to Generic pointers which are used to model HIP's "flat" pointers. return LangAS::cuda_device; if (auto AS = getTarget().getConstantAddressSpace()) return *AS; return LangAS::Default; } // In address space agnostic languages, string literals are in default address // space in AST. However, certain targets (e.g. amdgcn) request them to be // emitted in constant address space in LLVM IR. To be consistent with other // parts of AST, string literal global variables in constant address space // need to be casted to default address space before being put into address // map and referenced by other part of CodeGen. // In OpenCL, string literals are in constant address space in AST, therefore // they should not be casted to default address space. static llvm::Constant * castStringLiteralToDefaultAddressSpace(CodeGenModule &CGM, llvm::GlobalVariable *GV) { llvm::Constant *Cast = GV; if (!CGM.getLangOpts().OpenCL) { auto AS = CGM.GetGlobalConstantAddressSpace(); if (AS != LangAS::Default) Cast = CGM.getTargetCodeGenInfo().performAddrSpaceCast( CGM, GV, AS, LangAS::Default, llvm::PointerType::get( CGM.getLLVMContext(), CGM.getContext().getTargetAddressSpace(LangAS::Default))); } return Cast; } template void CodeGenModule::MaybeHandleStaticInExternC(const SomeDecl *D, llvm::GlobalValue *GV) { if (!getLangOpts().CPlusPlus) return; // Must have 'used' attribute, or else inline assembly can't rely on // the name existing. if (!D->template hasAttr()) return; // Must have internal linkage and an ordinary name. if (!D->getIdentifier() || D->getFormalLinkage() != Linkage::Internal) return; // Must be in an extern "C" context. Entities declared directly within // a record are not extern "C" even if the record is in such a context. const SomeDecl *First = D->getFirstDecl(); if (First->getDeclContext()->isRecord() || !First->isInExternCContext()) return; // OK, this is an internal linkage entity inside an extern "C" linkage // specification. Make a note of that so we can give it the "expected" // mangled name if nothing else is using that name. std::pair R = StaticExternCValues.insert(std::make_pair(D->getIdentifier(), GV)); // If we have multiple internal linkage entities with the same name // in extern "C" regions, none of them gets that name. if (!R.second) R.first->second = nullptr; } static bool shouldBeInCOMDAT(CodeGenModule &CGM, const Decl &D) { if (!CGM.supportsCOMDAT()) return false; if (D.hasAttr()) return true; GVALinkage Linkage; if (auto *VD = dyn_cast(&D)) Linkage = CGM.getContext().GetGVALinkageForVariable(VD); else Linkage = CGM.getContext().GetGVALinkageForFunction(cast(&D)); switch (Linkage) { case GVA_Internal: case GVA_AvailableExternally: case GVA_StrongExternal: return false; case GVA_DiscardableODR: case GVA_StrongODR: return true; } llvm_unreachable("No such linkage"); } bool CodeGenModule::supportsCOMDAT() const { return getTriple().supportsCOMDAT(); } void CodeGenModule::maybeSetTrivialComdat(const Decl &D, llvm::GlobalObject &GO) { if (!shouldBeInCOMDAT(*this, D)) return; GO.setComdat(TheModule.getOrInsertComdat(GO.getName())); } const ABIInfo &CodeGenModule::getABIInfo() { return getTargetCodeGenInfo().getABIInfo(); } /// Pass IsTentative as true if you want to create a tentative definition. void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, bool IsTentative) { // OpenCL global variables of sampler type are translated to function calls, // therefore no need to be translated. QualType ASTTy = D->getType(); if (getLangOpts().OpenCL && ASTTy->isSamplerT()) return; // If this is OpenMP device, check if it is legal to emit this global // normally. if (LangOpts.OpenMPIsTargetDevice && OpenMPRuntime && OpenMPRuntime->emitTargetGlobalVariable(D)) return; llvm::TrackingVH Init; bool NeedsGlobalCtor = false; // Whether the definition of the variable is available externally. // If yes, we shouldn't emit the GloablCtor and GlobalDtor for the variable // since this is the job for its original source. bool IsDefinitionAvailableExternally = getContext().GetGVALinkageForVariable(D) == GVA_AvailableExternally; bool NeedsGlobalDtor = !IsDefinitionAvailableExternally && D->needsDestruction(getContext()) == QualType::DK_cxx_destructor; // It is helpless to emit the definition for an available_externally variable // which can't be marked as const. // We don't need to check if it needs global ctor or dtor. See the above // comment for ideas. if (IsDefinitionAvailableExternally && (!D->hasConstantInitialization() || // TODO: Update this when we have interface to check constexpr // destructor. D->needsDestruction(getContext()) || !D->getType().isConstantStorage(getContext(), true, true))) return; const VarDecl *InitDecl; const Expr *InitExpr = D->getAnyInitializer(InitDecl); std::optional emitter; // CUDA E.2.4.1 "__shared__ variables cannot have an initialization // as part of their declaration." Sema has already checked for // error cases, so we just need to set Init to UndefValue. bool IsCUDASharedVar = getLangOpts().CUDAIsDevice && D->hasAttr(); // Shadows of initialized device-side global variables are also left // undefined. // Managed Variables should be initialized on both host side and device side. bool IsCUDAShadowVar = !getLangOpts().CUDAIsDevice && !D->hasAttr() && (D->hasAttr() || D->hasAttr() || D->hasAttr()); bool IsCUDADeviceShadowVar = getLangOpts().CUDAIsDevice && !D->hasAttr() && (D->getType()->isCUDADeviceBuiltinSurfaceType() || D->getType()->isCUDADeviceBuiltinTextureType()); if (getLangOpts().CUDA && (IsCUDASharedVar || IsCUDAShadowVar || IsCUDADeviceShadowVar)) Init = llvm::UndefValue::get(getTypes().ConvertTypeForMem(ASTTy)); else if (D->hasAttr()) Init = llvm::UndefValue::get(getTypes().ConvertTypeForMem(ASTTy)); else if (!InitExpr) { // This is a tentative definition; tentative definitions are // implicitly initialized with { 0 }. // // Note that tentative definitions are only emitted at the end of // a translation unit, so they should never have incomplete // type. In addition, EmitTentativeDefinition makes sure that we // never attempt to emit a tentative definition if a real one // exists. A use may still exists, however, so we still may need // to do a RAUW. assert(!ASTTy->isIncompleteType() && "Unexpected incomplete type"); Init = EmitNullConstant(D->getType()); } else { initializedGlobalDecl = GlobalDecl(D); emitter.emplace(*this); llvm::Constant *Initializer = emitter->tryEmitForInitializer(*InitDecl); if (!Initializer) { QualType T = InitExpr->getType(); if (D->getType()->isReferenceType()) T = D->getType(); if (getLangOpts().CPlusPlus) { if (InitDecl->hasFlexibleArrayInit(getContext())) ErrorUnsupported(D, "flexible array initializer"); Init = EmitNullConstant(T); if (!IsDefinitionAvailableExternally) NeedsGlobalCtor = true; } else { ErrorUnsupported(D, "static initializer"); Init = llvm::UndefValue::get(getTypes().ConvertType(T)); } } else { Init = Initializer; // We don't need an initializer, so remove the entry for the delayed // initializer position (just in case this entry was delayed) if we // also don't need to register a destructor. if (getLangOpts().CPlusPlus && !NeedsGlobalDtor) DelayedCXXInitPosition.erase(D); #ifndef NDEBUG CharUnits VarSize = getContext().getTypeSizeInChars(ASTTy) + InitDecl->getFlexibleArrayInitChars(getContext()); CharUnits CstSize = CharUnits::fromQuantity( getDataLayout().getTypeAllocSize(Init->getType())); assert(VarSize == CstSize && "Emitted constant has unexpected size"); #endif } } llvm::Type* InitType = Init->getType(); llvm::Constant *Entry = GetAddrOfGlobalVar(D, InitType, ForDefinition_t(!IsTentative)); // Strip off pointer casts if we got them. Entry = Entry->stripPointerCasts(); // Entry is now either a Function or GlobalVariable. auto *GV = dyn_cast(Entry); // We have a definition after a declaration with the wrong type. // We must make a new GlobalVariable* and update everything that used OldGV // (a declaration or tentative definition) with the new GlobalVariable* // (which will be a definition). // // This happens if there is a prototype for a global (e.g. // "extern int x[];") and then a definition of a different type (e.g. // "int x[10];"). This also happens when an initializer has a different type // from the type of the global (this happens with unions). if (!GV || GV->getValueType() != InitType || GV->getType()->getAddressSpace() != getContext().getTargetAddressSpace(GetGlobalVarAddressSpace(D))) { // Move the old entry aside so that we'll create a new one. Entry->setName(StringRef()); // Make a new global with the correct type, this is now guaranteed to work. GV = cast( GetAddrOfGlobalVar(D, InitType, ForDefinition_t(!IsTentative)) ->stripPointerCasts()); // Replace all uses of the old global with the new global llvm::Constant *NewPtrForOldDecl = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, Entry->getType()); Entry->replaceAllUsesWith(NewPtrForOldDecl); // Erase the old global, since it is no longer used. cast(Entry)->eraseFromParent(); } MaybeHandleStaticInExternC(D, GV); if (D->hasAttr()) AddGlobalAnnotations(D, GV); // Set the llvm linkage type as appropriate. llvm::GlobalValue::LinkageTypes Linkage = getLLVMLinkageVarDefinition(D); // CUDA B.2.1 "The __device__ qualifier declares a variable that resides on // the device. [...]" // CUDA B.2.2 "The __constant__ qualifier, optionally used together with // __device__, declares a variable that: [...] // Is accessible from all the threads within the grid and from the host // through the runtime library (cudaGetSymbolAddress() / cudaGetSymbolSize() // / cudaMemcpyToSymbol() / cudaMemcpyFromSymbol())." if (LangOpts.CUDA) { if (LangOpts.CUDAIsDevice) { if (Linkage != llvm::GlobalValue::InternalLinkage && (D->hasAttr() || D->hasAttr() || D->getType()->isCUDADeviceBuiltinSurfaceType() || D->getType()->isCUDADeviceBuiltinTextureType())) GV->setExternallyInitialized(true); } else { getCUDARuntime().internalizeDeviceSideVar(D, Linkage); } getCUDARuntime().handleVarRegistration(D, *GV); } GV->setInitializer(Init); if (emitter) emitter->finalize(GV); // If it is safe to mark the global 'constant', do so now. GV->setConstant(!NeedsGlobalCtor && !NeedsGlobalDtor && D->getType().isConstantStorage(getContext(), true, true)); // If it is in a read-only section, mark it 'constant'. if (const SectionAttr *SA = D->getAttr()) { const ASTContext::SectionInfo &SI = Context.SectionInfos[SA->getName()]; if ((SI.SectionFlags & ASTContext::PSF_Write) == 0) GV->setConstant(true); } CharUnits AlignVal = getContext().getDeclAlign(D); // Check for alignment specifed in an 'omp allocate' directive. if (std::optional AlignValFromAllocate = getOMPAllocateAlignment(D)) AlignVal = *AlignValFromAllocate; GV->setAlignment(AlignVal.getAsAlign()); // On Darwin, unlike other Itanium C++ ABI platforms, the thread-wrapper // function is only defined alongside the variable, not also alongside // callers. Normally, all accesses to a thread_local go through the // thread-wrapper in order to ensure initialization has occurred, underlying // variable will never be used other than the thread-wrapper, so it can be // converted to internal linkage. // // However, if the variable has the 'constinit' attribute, it _can_ be // referenced directly, without calling the thread-wrapper, so the linkage // must not be changed. // // Additionally, if the variable isn't plain external linkage, e.g. if it's // weak or linkonce, the de-duplication semantics are important to preserve, // so we don't change the linkage. if (D->getTLSKind() == VarDecl::TLS_Dynamic && Linkage == llvm::GlobalValue::ExternalLinkage && Context.getTargetInfo().getTriple().isOSDarwin() && !D->hasAttr()) Linkage = llvm::GlobalValue::InternalLinkage; GV->setLinkage(Linkage); if (D->hasAttr()) GV->setDLLStorageClass(llvm::GlobalVariable::DLLImportStorageClass); else if (D->hasAttr()) GV->setDLLStorageClass(llvm::GlobalVariable::DLLExportStorageClass); else GV->setDLLStorageClass(llvm::GlobalVariable::DefaultStorageClass); if (Linkage == llvm::GlobalVariable::CommonLinkage) { // common vars aren't constant even if declared const. GV->setConstant(false); // Tentative definition of global variables may be initialized with // non-zero null pointers. In this case they should have weak linkage // since common linkage must have zero initializer and must not have // explicit section therefore cannot have non-zero initial value. if (!GV->getInitializer()->isNullValue()) GV->setLinkage(llvm::GlobalVariable::WeakAnyLinkage); } setNonAliasAttributes(D, GV); if (D->getTLSKind() && !GV->isThreadLocal()) { if (D->getTLSKind() == VarDecl::TLS_Dynamic) CXXThreadLocals.push_back(D); setTLSMode(GV, *D); } maybeSetTrivialComdat(*D, *GV); // Emit the initializer function if necessary. if (NeedsGlobalCtor || NeedsGlobalDtor) EmitCXXGlobalVarDeclInitFunc(D, GV, NeedsGlobalCtor); SanitizerMD->reportGlobal(GV, *D, NeedsGlobalCtor); // Emit global variable debug information. if (CGDebugInfo *DI = getModuleDebugInfo()) if (getCodeGenOpts().hasReducedDebugInfo()) DI->EmitGlobalVariable(GV, D); } void CodeGenModule::EmitExternalVarDeclaration(const VarDecl *D) { if (CGDebugInfo *DI = getModuleDebugInfo()) if (getCodeGenOpts().hasReducedDebugInfo()) { QualType ASTTy = D->getType(); llvm::Type *Ty = getTypes().ConvertTypeForMem(D->getType()); llvm::Constant *GV = GetOrCreateLLVMGlobal(D->getName(), Ty, ASTTy.getAddressSpace(), D); DI->EmitExternalVariable( cast(GV->stripPointerCasts()), D); } } void CodeGenModule::EmitExternalFunctionDeclaration(const FunctionDecl *FD) { if (CGDebugInfo *DI = getModuleDebugInfo()) if (getCodeGenOpts().hasReducedDebugInfo()) { auto *Ty = getTypes().ConvertType(FD->getType()); StringRef MangledName = getMangledName(FD); auto *Fn = dyn_cast( GetOrCreateLLVMFunction(MangledName, Ty, FD, /* ForVTable */ false)); if (!Fn->getSubprogram()) DI->EmitFunctionDecl(FD, FD->getLocation(), FD->getType(), Fn); } } static bool isVarDeclStrongDefinition(const ASTContext &Context, CodeGenModule &CGM, const VarDecl *D, bool NoCommon) { // Don't give variables common linkage if -fno-common was specified unless it // was overridden by a NoCommon attribute. if ((NoCommon || D->hasAttr()) && !D->hasAttr()) return true; // C11 6.9.2/2: // A declaration of an identifier for an object that has file scope without // an initializer, and without a storage-class specifier or with the // storage-class specifier static, constitutes a tentative definition. if (D->getInit() || D->hasExternalStorage()) return true; // A variable cannot be both common and exist in a section. if (D->hasAttr()) return true; // A variable cannot be both common and exist in a section. // We don't try to determine which is the right section in the front-end. // If no specialized section name is applicable, it will resort to default. if (D->hasAttr() || D->hasAttr() || D->hasAttr() || D->hasAttr()) return true; // Thread local vars aren't considered common linkage. if (D->getTLSKind()) return true; // Tentative definitions marked with WeakImportAttr are true definitions. if (D->hasAttr()) return true; // A variable cannot be both common and exist in a comdat. if (shouldBeInCOMDAT(CGM, *D)) return true; // Declarations with a required alignment do not have common linkage in MSVC // mode. if (Context.getTargetInfo().getCXXABI().isMicrosoft()) { if (D->hasAttr()) return true; QualType VarType = D->getType(); if (Context.isAlignmentRequired(VarType)) return true; if (const auto *RT = VarType->getAs()) { const RecordDecl *RD = RT->getDecl(); for (const FieldDecl *FD : RD->fields()) { if (FD->isBitField()) continue; if (FD->hasAttr()) return true; if (Context.isAlignmentRequired(FD->getType())) return true; } } } // Microsoft's link.exe doesn't support alignments greater than 32 bytes for // common symbols, so symbols with greater alignment requirements cannot be // common. // Other COFF linkers (ld.bfd and LLD) support arbitrary power-of-two // alignments for common symbols via the aligncomm directive, so this // restriction only applies to MSVC environments. if (Context.getTargetInfo().getTriple().isKnownWindowsMSVCEnvironment() && Context.getTypeAlignIfKnown(D->getType()) > Context.toBits(CharUnits::fromQuantity(32))) return true; return false; } llvm::GlobalValue::LinkageTypes CodeGenModule::getLLVMLinkageForDeclarator(const DeclaratorDecl *D, GVALinkage Linkage) { if (Linkage == GVA_Internal) return llvm::Function::InternalLinkage; if (D->hasAttr()) return llvm::GlobalVariable::WeakAnyLinkage; if (const auto *FD = D->getAsFunction()) if (FD->isMultiVersion() && Linkage == GVA_AvailableExternally) return llvm::GlobalVariable::LinkOnceAnyLinkage; // We are guaranteed to have a strong definition somewhere else, // so we can use available_externally linkage. if (Linkage == GVA_AvailableExternally) return llvm::GlobalValue::AvailableExternallyLinkage; // Note that Apple's kernel linker doesn't support symbol // coalescing, so we need to avoid linkonce and weak linkages there. // Normally, this means we just map to internal, but for explicit // instantiations we'll map to external. // In C++, the compiler has to emit a definition in every translation unit // that references the function. We should use linkonce_odr because // a) if all references in this translation unit are optimized away, we // don't need to codegen it. b) if the function persists, it needs to be // merged with other definitions. c) C++ has the ODR, so we know the // definition is dependable. if (Linkage == GVA_DiscardableODR) return !Context.getLangOpts().AppleKext ? llvm::Function::LinkOnceODRLinkage : llvm::Function::InternalLinkage; // An explicit instantiation of a template has weak linkage, since // explicit instantiations can occur in multiple translation units // and must all be equivalent. However, we are not allowed to // throw away these explicit instantiations. // // CUDA/HIP: For -fno-gpu-rdc case, device code is limited to one TU, // so say that CUDA templates are either external (for kernels) or internal. // This lets llvm perform aggressive inter-procedural optimizations. For // -fgpu-rdc case, device function calls across multiple TU's are allowed, // therefore we need to follow the normal linkage paradigm. if (Linkage == GVA_StrongODR) { if (getLangOpts().AppleKext) return llvm::Function::ExternalLinkage; if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice && !getLangOpts().GPURelocatableDeviceCode) return D->hasAttr() ? llvm::Function::ExternalLinkage : llvm::Function::InternalLinkage; return llvm::Function::WeakODRLinkage; } // C++ doesn't have tentative definitions and thus cannot have common // linkage. if (!getLangOpts().CPlusPlus && isa(D) && !isVarDeclStrongDefinition(Context, *this, cast(D), CodeGenOpts.NoCommon)) return llvm::GlobalVariable::CommonLinkage; // selectany symbols are externally visible, so use weak instead of // linkonce. MSVC optimizes away references to const selectany globals, so // all definitions should be the same and ODR linkage should be used. // http://msdn.microsoft.com/en-us/library/5tkz6s71.aspx if (D->hasAttr()) return llvm::GlobalVariable::WeakODRLinkage; // Otherwise, we have strong external linkage. assert(Linkage == GVA_StrongExternal); return llvm::GlobalVariable::ExternalLinkage; } llvm::GlobalValue::LinkageTypes CodeGenModule::getLLVMLinkageVarDefinition(const VarDecl *VD) { GVALinkage Linkage = getContext().GetGVALinkageForVariable(VD); return getLLVMLinkageForDeclarator(VD, Linkage); } /// Replace the uses of a function that was declared with a non-proto type. /// We want to silently drop extra arguments from call sites static void replaceUsesOfNonProtoConstant(llvm::Constant *old, llvm::Function *newFn) { // Fast path. if (old->use_empty()) return; llvm::Type *newRetTy = newFn->getReturnType(); SmallVector newArgs; SmallVector callSitesToBeRemovedFromParent; for (llvm::Value::use_iterator ui = old->use_begin(), ue = old->use_end(); ui != ue; ui++) { llvm::User *user = ui->getUser(); // Recognize and replace uses of bitcasts. Most calls to // unprototyped functions will use bitcasts. if (auto *bitcast = dyn_cast(user)) { if (bitcast->getOpcode() == llvm::Instruction::BitCast) replaceUsesOfNonProtoConstant(bitcast, newFn); continue; } // Recognize calls to the function. llvm::CallBase *callSite = dyn_cast(user); if (!callSite) continue; if (!callSite->isCallee(&*ui)) continue; // If the return types don't match exactly, then we can't // transform this call unless it's dead. if (callSite->getType() != newRetTy && !callSite->use_empty()) continue; // Get the call site's attribute list. SmallVector newArgAttrs; llvm::AttributeList oldAttrs = callSite->getAttributes(); // If the function was passed too few arguments, don't transform. unsigned newNumArgs = newFn->arg_size(); if (callSite->arg_size() < newNumArgs) continue; // If extra arguments were passed, we silently drop them. // If any of the types mismatch, we don't transform. unsigned argNo = 0; bool dontTransform = false; for (llvm::Argument &A : newFn->args()) { if (callSite->getArgOperand(argNo)->getType() != A.getType()) { dontTransform = true; break; } // Add any parameter attributes. newArgAttrs.push_back(oldAttrs.getParamAttrs(argNo)); argNo++; } if (dontTransform) continue; // Okay, we can transform this. Create the new call instruction and copy // over the required information. newArgs.append(callSite->arg_begin(), callSite->arg_begin() + argNo); // Copy over any operand bundles. SmallVector newBundles; callSite->getOperandBundlesAsDefs(newBundles); llvm::CallBase *newCall; if (isa(callSite)) { newCall = llvm::CallInst::Create(newFn, newArgs, newBundles, "", callSite); } else { auto *oldInvoke = cast(callSite); newCall = llvm::InvokeInst::Create(newFn, oldInvoke->getNormalDest(), oldInvoke->getUnwindDest(), newArgs, newBundles, "", callSite); } newArgs.clear(); // for the next iteration if (!newCall->getType()->isVoidTy()) newCall->takeName(callSite); newCall->setAttributes( llvm::AttributeList::get(newFn->getContext(), oldAttrs.getFnAttrs(), oldAttrs.getRetAttrs(), newArgAttrs)); newCall->setCallingConv(callSite->getCallingConv()); // Finally, remove the old call, replacing any uses with the new one. if (!callSite->use_empty()) callSite->replaceAllUsesWith(newCall); // Copy debug location attached to CI. if (callSite->getDebugLoc()) newCall->setDebugLoc(callSite->getDebugLoc()); callSitesToBeRemovedFromParent.push_back(callSite); } for (auto *callSite : callSitesToBeRemovedFromParent) { callSite->eraseFromParent(); } } /// ReplaceUsesOfNonProtoTypeWithRealFunction - This function is called when we /// implement a function with no prototype, e.g. "int foo() {}". If there are /// existing call uses of the old function in the module, this adjusts them to /// call the new function directly. /// /// This is not just a cleanup: the always_inline pass requires direct calls to /// functions to be able to inline them. If there is a bitcast in the way, it /// won't inline them. Instcombine normally deletes these calls, but it isn't /// run at -O0. static void ReplaceUsesOfNonProtoTypeWithRealFunction(llvm::GlobalValue *Old, llvm::Function *NewFn) { // If we're redefining a global as a function, don't transform it. if (!isa(Old)) return; replaceUsesOfNonProtoConstant(Old, NewFn); } void CodeGenModule::HandleCXXStaticMemberVarInstantiation(VarDecl *VD) { auto DK = VD->isThisDeclarationADefinition(); if ((DK == VarDecl::Definition && VD->hasAttr()) || (LangOpts.CUDA && !shouldEmitCUDAGlobalVar(VD))) return; TemplateSpecializationKind TSK = VD->getTemplateSpecializationKind(); // If we have a definition, this might be a deferred decl. If the // instantiation is explicit, make sure we emit it at the end. if (VD->getDefinition() && TSK == TSK_ExplicitInstantiationDefinition) GetAddrOfGlobalVar(VD); EmitTopLevelDecl(VD); } void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { const auto *D = cast(GD.getDecl()); // Compute the function info and LLVM type. const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD); llvm::FunctionType *Ty = getTypes().GetFunctionType(FI); // Get or create the prototype for the function. if (!GV || (GV->getValueType() != Ty)) GV = cast(GetAddrOfFunction(GD, Ty, /*ForVTable=*/false, /*DontDefer=*/true, ForDefinition)); // Already emitted. if (!GV->isDeclaration()) return; // We need to set linkage and visibility on the function before // generating code for it because various parts of IR generation // want to propagate this information down (e.g. to local static // declarations). auto *Fn = cast(GV); setFunctionLinkage(GD, Fn); // FIXME: this is redundant with part of setFunctionDefinitionAttributes setGVProperties(Fn, GD); MaybeHandleStaticInExternC(D, Fn); maybeSetTrivialComdat(*D, *Fn); CodeGenFunction(*this).GenerateCode(GD, Fn, FI); setNonAliasAttributes(GD, Fn); SetLLVMFunctionAttributesForDefinition(D, Fn); if (const ConstructorAttr *CA = D->getAttr()) AddGlobalCtor(Fn, CA->getPriority()); if (const DestructorAttr *DA = D->getAttr()) AddGlobalDtor(Fn, DA->getPriority(), true); if (getLangOpts().OpenMP && D->hasAttr()) getOpenMPRuntime().emitDeclareTargetFunction(D, GV); } void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) { const auto *D = cast(GD.getDecl()); const AliasAttr *AA = D->getAttr(); assert(AA && "Not an alias?"); StringRef MangledName = getMangledName(GD); if (AA->getAliasee() == MangledName) { Diags.Report(AA->getLocation(), diag::err_cyclic_alias) << 0; return; } // If there is a definition in the module, then it wins over the alias. // This is dubious, but allow it to be safe. Just ignore the alias. llvm::GlobalValue *Entry = GetGlobalValue(MangledName); if (Entry && !Entry->isDeclaration()) return; Aliases.push_back(GD); llvm::Type *DeclTy = getTypes().ConvertTypeForMem(D->getType()); // Create a reference to the named value. This ensures that it is emitted // if a deferred decl. llvm::Constant *Aliasee; llvm::GlobalValue::LinkageTypes LT; if (isa(DeclTy)) { Aliasee = GetOrCreateLLVMFunction(AA->getAliasee(), DeclTy, GD, /*ForVTable=*/false); LT = getFunctionLinkage(GD); } else { Aliasee = GetOrCreateLLVMGlobal(AA->getAliasee(), DeclTy, LangAS::Default, /*D=*/nullptr); if (const auto *VD = dyn_cast(GD.getDecl())) LT = getLLVMLinkageVarDefinition(VD); else LT = getFunctionLinkage(GD); } // Create the new alias itself, but don't set a name yet. unsigned AS = Aliasee->getType()->getPointerAddressSpace(); auto *GA = llvm::GlobalAlias::create(DeclTy, AS, LT, "", Aliasee, &getModule()); if (Entry) { if (GA->getAliasee() == Entry) { Diags.Report(AA->getLocation(), diag::err_cyclic_alias) << 0; return; } assert(Entry->isDeclaration()); // If there is a declaration in the module, then we had an extern followed // by the alias, as in: // extern int test6(); // ... // int test6() __attribute__((alias("test7"))); // // Remove it and replace uses of it with the alias. GA->takeName(Entry); Entry->replaceAllUsesWith(GA); Entry->eraseFromParent(); } else { GA->setName(MangledName); } // Set attributes which are particular to an alias; this is a // specialization of the attributes which may be set on a global // variable/function. if (D->hasAttr() || D->hasAttr() || D->isWeakImported()) { GA->setLinkage(llvm::Function::WeakAnyLinkage); } if (const auto *VD = dyn_cast(D)) if (VD->getTLSKind()) setTLSMode(GA, *VD); SetCommonAttributes(GD, GA); // Emit global alias debug information. if (isa(D)) if (CGDebugInfo *DI = getModuleDebugInfo()) DI->EmitGlobalAlias(cast(GA->getAliasee()->stripPointerCasts()), GD); } void CodeGenModule::emitIFuncDefinition(GlobalDecl GD) { const auto *D = cast(GD.getDecl()); const IFuncAttr *IFA = D->getAttr(); assert(IFA && "Not an ifunc?"); StringRef MangledName = getMangledName(GD); if (IFA->getResolver() == MangledName) { Diags.Report(IFA->getLocation(), diag::err_cyclic_alias) << 1; return; } // Report an error if some definition overrides ifunc. llvm::GlobalValue *Entry = GetGlobalValue(MangledName); if (Entry && !Entry->isDeclaration()) { GlobalDecl OtherGD; if (lookupRepresentativeDecl(MangledName, OtherGD) && DiagnosedConflictingDefinitions.insert(GD).second) { Diags.Report(D->getLocation(), diag::err_duplicate_mangled_name) << MangledName; Diags.Report(OtherGD.getDecl()->getLocation(), diag::note_previous_definition); } return; } Aliases.push_back(GD); // The resolver might not be visited yet. Specify a dummy non-function type to // indicate IsIncompleteFunction. Either the type is ignored (if the resolver // was emitted) or the whole function will be replaced (if the resolver has // not been emitted). llvm::Constant *Resolver = GetOrCreateLLVMFunction(IFA->getResolver(), VoidTy, {}, /*ForVTable=*/false); llvm::Type *DeclTy = getTypes().ConvertTypeForMem(D->getType()); llvm::GlobalIFunc *GIF = llvm::GlobalIFunc::create(DeclTy, 0, llvm::Function::ExternalLinkage, "", Resolver, &getModule()); if (Entry) { if (GIF->getResolver() == Entry) { Diags.Report(IFA->getLocation(), diag::err_cyclic_alias) << 1; return; } assert(Entry->isDeclaration()); // If there is a declaration in the module, then we had an extern followed // by the ifunc, as in: // extern int test(); // ... // int test() __attribute__((ifunc("resolver"))); // // Remove it and replace uses of it with the ifunc. GIF->takeName(Entry); Entry->replaceAllUsesWith(GIF); Entry->eraseFromParent(); } else GIF->setName(MangledName); SetCommonAttributes(GD, GIF); } llvm::Function *CodeGenModule::getIntrinsic(unsigned IID, ArrayRef Tys) { return llvm::Intrinsic::getDeclaration(&getModule(), (llvm::Intrinsic::ID)IID, Tys); } static llvm::StringMapEntry & GetConstantCFStringEntry(llvm::StringMap &Map, const StringLiteral *Literal, bool TargetIsLSB, bool &IsUTF16, unsigned &StringLength) { StringRef String = Literal->getString(); unsigned NumBytes = String.size(); // Check for simple case. if (!Literal->containsNonAsciiOrNull()) { StringLength = NumBytes; return *Map.insert(std::make_pair(String, nullptr)).first; } // Otherwise, convert the UTF8 literals into a string of shorts. IsUTF16 = true; SmallVector ToBuf(NumBytes + 1); // +1 for ending nulls. const llvm::UTF8 *FromPtr = (const llvm::UTF8 *)String.data(); llvm::UTF16 *ToPtr = &ToBuf[0]; (void)llvm::ConvertUTF8toUTF16(&FromPtr, FromPtr + NumBytes, &ToPtr, ToPtr + NumBytes, llvm::strictConversion); // ConvertUTF8toUTF16 returns the length in ToPtr. StringLength = ToPtr - &ToBuf[0]; // Add an explicit null. *ToPtr = 0; return *Map.insert(std::make_pair( StringRef(reinterpret_cast(ToBuf.data()), (StringLength + 1) * 2), nullptr)).first; } ConstantAddress CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) { unsigned StringLength = 0; bool isUTF16 = false; llvm::StringMapEntry &Entry = GetConstantCFStringEntry(CFConstantStringMap, Literal, getDataLayout().isLittleEndian(), isUTF16, StringLength); if (auto *C = Entry.second) return ConstantAddress( C, C->getValueType(), CharUnits::fromQuantity(C->getAlignment())); const ASTContext &Context = getContext(); const llvm::Triple &Triple = getTriple(); const auto CFRuntime = getLangOpts().CFRuntime; const bool IsSwiftABI = static_cast(CFRuntime) >= static_cast(LangOptions::CoreFoundationABI::Swift); const bool IsSwift4_1 = CFRuntime == LangOptions::CoreFoundationABI::Swift4_1; // If we don't already have it, get __CFConstantStringClassReference. if (!CFConstantStringClassRef) { const char *CFConstantStringClassName = "__CFConstantStringClassReference"; llvm::Type *Ty = getTypes().ConvertType(getContext().IntTy); Ty = llvm::ArrayType::get(Ty, 0); switch (CFRuntime) { default: break; case LangOptions::CoreFoundationABI::Swift: [[fallthrough]]; case LangOptions::CoreFoundationABI::Swift5_0: CFConstantStringClassName = Triple.isOSDarwin() ? "$s15SwiftFoundation19_NSCFConstantStringCN" : "$s10Foundation19_NSCFConstantStringCN"; Ty = IntPtrTy; break; case LangOptions::CoreFoundationABI::Swift4_2: CFConstantStringClassName = Triple.isOSDarwin() ? "$S15SwiftFoundation19_NSCFConstantStringCN" : "$S10Foundation19_NSCFConstantStringCN"; Ty = IntPtrTy; break; case LangOptions::CoreFoundationABI::Swift4_1: CFConstantStringClassName = Triple.isOSDarwin() ? "__T015SwiftFoundation19_NSCFConstantStringCN" : "__T010Foundation19_NSCFConstantStringCN"; Ty = IntPtrTy; break; } llvm::Constant *C = CreateRuntimeVariable(Ty, CFConstantStringClassName); if (Triple.isOSBinFormatELF() || Triple.isOSBinFormatCOFF()) { llvm::GlobalValue *GV = nullptr; if ((GV = dyn_cast(C))) { IdentifierInfo &II = Context.Idents.get(GV->getName()); TranslationUnitDecl *TUDecl = Context.getTranslationUnitDecl(); DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl); const VarDecl *VD = nullptr; for (const auto *Result : DC->lookup(&II)) if ((VD = dyn_cast(Result))) break; if (Triple.isOSBinFormatELF()) { if (!VD) GV->setLinkage(llvm::GlobalValue::ExternalLinkage); } else { GV->setLinkage(llvm::GlobalValue::ExternalLinkage); if (!VD || !VD->hasAttr()) GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); else GV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); } setDSOLocal(GV); } } // Decay array -> ptr CFConstantStringClassRef = IsSwiftABI ? llvm::ConstantExpr::getPtrToInt(C, Ty) : C; } QualType CFTy = Context.getCFConstantStringType(); auto *STy = cast(getTypes().ConvertType(CFTy)); ConstantInitBuilder Builder(*this); auto Fields = Builder.beginStruct(STy); // Class pointer. Fields.add(cast(CFConstantStringClassRef)); // Flags. if (IsSwiftABI) { Fields.addInt(IntPtrTy, IsSwift4_1 ? 0x05 : 0x01); Fields.addInt(Int64Ty, isUTF16 ? 0x07d0 : 0x07c8); } else { Fields.addInt(IntTy, isUTF16 ? 0x07d0 : 0x07C8); } // String pointer. llvm::Constant *C = nullptr; if (isUTF16) { auto Arr = llvm::ArrayRef( reinterpret_cast(const_cast(Entry.first().data())), Entry.first().size() / 2); C = llvm::ConstantDataArray::get(VMContext, Arr); } else { C = llvm::ConstantDataArray::getString(VMContext, Entry.first()); } // Note: -fwritable-strings doesn't make the backing store strings of // CFStrings writable. auto *GV = new llvm::GlobalVariable(getModule(), C->getType(), /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, C, ".str"); GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); // Don't enforce the target's minimum global alignment, since the only use // of the string is via this class initializer. CharUnits Align = isUTF16 ? Context.getTypeAlignInChars(Context.ShortTy) : Context.getTypeAlignInChars(Context.CharTy); GV->setAlignment(Align.getAsAlign()); // FIXME: We set the section explicitly to avoid a bug in ld64 224.1. // Without it LLVM can merge the string with a non unnamed_addr one during // LTO. Doing that changes the section it ends in, which surprises ld64. if (Triple.isOSBinFormatMachO()) GV->setSection(isUTF16 ? "__TEXT,__ustring" : "__TEXT,__cstring,cstring_literals"); // Make sure the literal ends up in .rodata to allow for safe ICF and for // the static linker to adjust permissions to read-only later on. else if (Triple.isOSBinFormatELF()) GV->setSection(".rodata"); // String. Fields.add(GV); // String length. llvm::IntegerType *LengthTy = llvm::IntegerType::get(getModule().getContext(), Context.getTargetInfo().getLongWidth()); if (IsSwiftABI) { if (CFRuntime == LangOptions::CoreFoundationABI::Swift4_1 || CFRuntime == LangOptions::CoreFoundationABI::Swift4_2) LengthTy = Int32Ty; else LengthTy = IntPtrTy; } Fields.addInt(LengthTy, StringLength); // Swift ABI requires 8-byte alignment to ensure that the _Atomic(uint64_t) is // properly aligned on 32-bit platforms. CharUnits Alignment = IsSwiftABI ? Context.toCharUnitsFromBits(64) : getPointerAlign(); // The struct. GV = Fields.finishAndCreateGlobal("_unnamed_cfstring_", Alignment, /*isConstant=*/false, llvm::GlobalVariable::PrivateLinkage); GV->addAttribute("objc_arc_inert"); switch (Triple.getObjectFormat()) { case llvm::Triple::UnknownObjectFormat: llvm_unreachable("unknown file format"); case llvm::Triple::DXContainer: case llvm::Triple::GOFF: case llvm::Triple::SPIRV: case llvm::Triple::XCOFF: llvm_unreachable("unimplemented"); case llvm::Triple::COFF: case llvm::Triple::ELF: case llvm::Triple::Wasm: GV->setSection("cfstring"); break; case llvm::Triple::MachO: GV->setSection("__DATA,__cfstring"); break; } Entry.second = GV; return ConstantAddress(GV, GV->getValueType(), Alignment); } bool CodeGenModule::getExpressionLocationsEnabled() const { return !CodeGenOpts.EmitCodeView || CodeGenOpts.DebugColumnInfo; } QualType CodeGenModule::getObjCFastEnumerationStateType() { if (ObjCFastEnumerationStateType.isNull()) { RecordDecl *D = Context.buildImplicitRecord("__objcFastEnumerationState"); D->startDefinition(); QualType FieldTypes[] = { Context.UnsignedLongTy, Context.getPointerType(Context.getObjCIdType()), Context.getPointerType(Context.UnsignedLongTy), Context.getConstantArrayType(Context.UnsignedLongTy, llvm::APInt(32, 5), nullptr, ArraySizeModifier::Normal, 0)}; for (size_t i = 0; i < 4; ++i) { FieldDecl *Field = FieldDecl::Create(Context, D, SourceLocation(), SourceLocation(), nullptr, FieldTypes[i], /*TInfo=*/nullptr, /*BitWidth=*/nullptr, /*Mutable=*/false, ICIS_NoInit); Field->setAccess(AS_public); D->addDecl(Field); } D->completeDefinition(); ObjCFastEnumerationStateType = Context.getTagDeclType(D); } return ObjCFastEnumerationStateType; } llvm::Constant * CodeGenModule::GetConstantArrayFromStringLiteral(const StringLiteral *E) { assert(!E->getType()->isPointerType() && "Strings are always arrays"); // Don't emit it as the address of the string, emit the string data itself // as an inline array. if (E->getCharByteWidth() == 1) { SmallString<64> Str(E->getString()); // Resize the string to the right size, which is indicated by its type. const ConstantArrayType *CAT = Context.getAsConstantArrayType(E->getType()); assert(CAT && "String literal not of constant array type!"); Str.resize(CAT->getZExtSize()); return llvm::ConstantDataArray::getString(VMContext, Str, false); } auto *AType = cast(getTypes().ConvertType(E->getType())); llvm::Type *ElemTy = AType->getElementType(); unsigned NumElements = AType->getNumElements(); // Wide strings have either 2-byte or 4-byte elements. if (ElemTy->getPrimitiveSizeInBits() == 16) { SmallVector Elements; Elements.reserve(NumElements); for(unsigned i = 0, e = E->getLength(); i != e; ++i) Elements.push_back(E->getCodeUnit(i)); Elements.resize(NumElements); return llvm::ConstantDataArray::get(VMContext, Elements); } assert(ElemTy->getPrimitiveSizeInBits() == 32); SmallVector Elements; Elements.reserve(NumElements); for(unsigned i = 0, e = E->getLength(); i != e; ++i) Elements.push_back(E->getCodeUnit(i)); Elements.resize(NumElements); return llvm::ConstantDataArray::get(VMContext, Elements); } static llvm::GlobalVariable * GenerateStringLiteral(llvm::Constant *C, llvm::GlobalValue::LinkageTypes LT, CodeGenModule &CGM, StringRef GlobalName, CharUnits Alignment) { unsigned AddrSpace = CGM.getContext().getTargetAddressSpace( CGM.GetGlobalConstantAddressSpace()); llvm::Module &M = CGM.getModule(); // Create a global variable for this string auto *GV = new llvm::GlobalVariable( M, C->getType(), !CGM.getLangOpts().WritableStrings, LT, C, GlobalName, nullptr, llvm::GlobalVariable::NotThreadLocal, AddrSpace); GV->setAlignment(Alignment.getAsAlign()); GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); if (GV->isWeakForLinker()) { assert(CGM.supportsCOMDAT() && "Only COFF uses weak string literals"); GV->setComdat(M.getOrInsertComdat(GV->getName())); } CGM.setDSOLocal(GV); return GV; } /// GetAddrOfConstantStringFromLiteral - Return a pointer to a /// constant array for the given string literal. ConstantAddress CodeGenModule::GetAddrOfConstantStringFromLiteral(const StringLiteral *S, StringRef Name) { CharUnits Alignment = getContext().getAlignOfGlobalVarInChars(S->getType(), /*VD=*/nullptr); llvm::Constant *C = GetConstantArrayFromStringLiteral(S); llvm::GlobalVariable **Entry = nullptr; if (!LangOpts.WritableStrings) { Entry = &ConstantStringMap[C]; if (auto GV = *Entry) { if (uint64_t(Alignment.getQuantity()) > GV->getAlignment()) GV->setAlignment(Alignment.getAsAlign()); return ConstantAddress(castStringLiteralToDefaultAddressSpace(*this, GV), GV->getValueType(), Alignment); } } SmallString<256> MangledNameBuffer; StringRef GlobalVariableName; llvm::GlobalValue::LinkageTypes LT; // Mangle the string literal if that's how the ABI merges duplicate strings. // Don't do it if they are writable, since we don't want writes in one TU to // affect strings in another. if (getCXXABI().getMangleContext().shouldMangleStringLiteral(S) && !LangOpts.WritableStrings) { llvm::raw_svector_ostream Out(MangledNameBuffer); getCXXABI().getMangleContext().mangleStringLiteral(S, Out); LT = llvm::GlobalValue::LinkOnceODRLinkage; GlobalVariableName = MangledNameBuffer; } else { LT = llvm::GlobalValue::PrivateLinkage; GlobalVariableName = Name; } auto GV = GenerateStringLiteral(C, LT, *this, GlobalVariableName, Alignment); CGDebugInfo *DI = getModuleDebugInfo(); if (DI && getCodeGenOpts().hasReducedDebugInfo()) DI->AddStringLiteralDebugInfo(GV, S); if (Entry) *Entry = GV; SanitizerMD->reportGlobal(GV, S->getStrTokenLoc(0), ""); return ConstantAddress(castStringLiteralToDefaultAddressSpace(*this, GV), GV->getValueType(), Alignment); } /// GetAddrOfConstantStringFromObjCEncode - Return a pointer to a constant /// array for the given ObjCEncodeExpr node. ConstantAddress CodeGenModule::GetAddrOfConstantStringFromObjCEncode(const ObjCEncodeExpr *E) { std::string Str; getContext().getObjCEncodingForType(E->getEncodedType(), Str); return GetAddrOfConstantCString(Str); } /// GetAddrOfConstantCString - Returns a pointer to a character array containing /// the literal and a terminating '\0' character. /// The result has pointer to array type. ConstantAddress CodeGenModule::GetAddrOfConstantCString( const std::string &Str, const char *GlobalName) { StringRef StrWithNull(Str.c_str(), Str.size() + 1); CharUnits Alignment = getContext().getAlignOfGlobalVarInChars( getContext().CharTy, /*VD=*/nullptr); llvm::Constant *C = llvm::ConstantDataArray::getString(getLLVMContext(), StrWithNull, false); // Don't share any string literals if strings aren't constant. llvm::GlobalVariable **Entry = nullptr; if (!LangOpts.WritableStrings) { Entry = &ConstantStringMap[C]; if (auto GV = *Entry) { if (uint64_t(Alignment.getQuantity()) > GV->getAlignment()) GV->setAlignment(Alignment.getAsAlign()); return ConstantAddress(castStringLiteralToDefaultAddressSpace(*this, GV), GV->getValueType(), Alignment); } } // Get the default prefix if a name wasn't specified. if (!GlobalName) GlobalName = ".str"; // Create a global variable for this. auto GV = GenerateStringLiteral(C, llvm::GlobalValue::PrivateLinkage, *this, GlobalName, Alignment); if (Entry) *Entry = GV; return ConstantAddress(castStringLiteralToDefaultAddressSpace(*this, GV), GV->getValueType(), Alignment); } ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary( const MaterializeTemporaryExpr *E, const Expr *Init) { assert((E->getStorageDuration() == SD_Static || E->getStorageDuration() == SD_Thread) && "not a global temporary"); const auto *VD = cast(E->getExtendingDecl()); // If we're not materializing a subobject of the temporary, keep the // cv-qualifiers from the type of the MaterializeTemporaryExpr. QualType MaterializedType = Init->getType(); if (Init == E->getSubExpr()) MaterializedType = E->getType(); CharUnits Align = getContext().getTypeAlignInChars(MaterializedType); auto InsertResult = MaterializedGlobalTemporaryMap.insert({E, nullptr}); if (!InsertResult.second) { // We've seen this before: either we already created it or we're in the // process of doing so. if (!InsertResult.first->second) { // We recursively re-entered this function, probably during emission of // the initializer. Create a placeholder. We'll clean this up in the // outer call, at the end of this function. llvm::Type *Type = getTypes().ConvertTypeForMem(MaterializedType); InsertResult.first->second = new llvm::GlobalVariable( getModule(), Type, false, llvm::GlobalVariable::InternalLinkage, nullptr); } return ConstantAddress(InsertResult.first->second, llvm::cast( InsertResult.first->second->stripPointerCasts()) ->getValueType(), Align); } // FIXME: If an externally-visible declaration extends multiple temporaries, // we need to give each temporary the same name in every translation unit (and // we also need to make the temporaries externally-visible). SmallString<256> Name; llvm::raw_svector_ostream Out(Name); getCXXABI().getMangleContext().mangleReferenceTemporary( VD, E->getManglingNumber(), Out); APValue *Value = nullptr; if (E->getStorageDuration() == SD_Static && VD->evaluateValue()) { // If the initializer of the extending declaration is a constant // initializer, we should have a cached constant initializer for this // temporary. Note that this might have a different value from the value // computed by evaluating the initializer if the surrounding constant // expression modifies the temporary. Value = E->getOrCreateValue(false); } // Try evaluating it now, it might have a constant initializer. Expr::EvalResult EvalResult; if (!Value && Init->EvaluateAsRValue(EvalResult, getContext()) && !EvalResult.hasSideEffects()) Value = &EvalResult.Val; LangAS AddrSpace = GetGlobalVarAddressSpace(VD); std::optional emitter; llvm::Constant *InitialValue = nullptr; bool Constant = false; llvm::Type *Type; if (Value) { // The temporary has a constant initializer, use it. emitter.emplace(*this); InitialValue = emitter->emitForInitializer(*Value, AddrSpace, MaterializedType); Constant = MaterializedType.isConstantStorage(getContext(), /*ExcludeCtor*/ Value, /*ExcludeDtor*/ false); Type = InitialValue->getType(); } else { // No initializer, the initialization will be provided when we // initialize the declaration which performed lifetime extension. Type = getTypes().ConvertTypeForMem(MaterializedType); } // Create a global variable for this lifetime-extended temporary. llvm::GlobalValue::LinkageTypes Linkage = getLLVMLinkageVarDefinition(VD); if (Linkage == llvm::GlobalVariable::ExternalLinkage) { const VarDecl *InitVD; if (VD->isStaticDataMember() && VD->getAnyInitializer(InitVD) && isa(InitVD->getLexicalDeclContext())) { // Temporaries defined inside a class get linkonce_odr linkage because the // class can be defined in multiple translation units. Linkage = llvm::GlobalVariable::LinkOnceODRLinkage; } else { // There is no need for this temporary to have external linkage if the // VarDecl has external linkage. Linkage = llvm::GlobalVariable::InternalLinkage; } } auto TargetAS = getContext().getTargetAddressSpace(AddrSpace); auto *GV = new llvm::GlobalVariable( getModule(), Type, Constant, Linkage, InitialValue, Name.c_str(), /*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal, TargetAS); if (emitter) emitter->finalize(GV); // Don't assign dllimport or dllexport to local linkage globals. if (!llvm::GlobalValue::isLocalLinkage(Linkage)) { setGVProperties(GV, VD); if (GV->getDLLStorageClass() == llvm::GlobalVariable::DLLExportStorageClass) // The reference temporary should never be dllexport. GV->setDLLStorageClass(llvm::GlobalVariable::DefaultStorageClass); } GV->setAlignment(Align.getAsAlign()); if (supportsCOMDAT() && GV->isWeakForLinker()) GV->setComdat(TheModule.getOrInsertComdat(GV->getName())); if (VD->getTLSKind()) setTLSMode(GV, *VD); llvm::Constant *CV = GV; if (AddrSpace != LangAS::Default) CV = getTargetCodeGenInfo().performAddrSpaceCast( *this, GV, AddrSpace, LangAS::Default, llvm::PointerType::get( getLLVMContext(), getContext().getTargetAddressSpace(LangAS::Default))); // Update the map with the new temporary. If we created a placeholder above, // replace it with the new global now. llvm::Constant *&Entry = MaterializedGlobalTemporaryMap[E]; if (Entry) { Entry->replaceAllUsesWith(CV); llvm::cast(Entry)->eraseFromParent(); } Entry = CV; return ConstantAddress(CV, Type, Align); } /// EmitObjCPropertyImplementations - Emit information for synthesized /// properties for an implementation. void CodeGenModule::EmitObjCPropertyImplementations(const ObjCImplementationDecl *D) { for (const auto *PID : D->property_impls()) { // Dynamic is just for type-checking. if (PID->getPropertyImplementation() == ObjCPropertyImplDecl::Synthesize) { ObjCPropertyDecl *PD = PID->getPropertyDecl(); // Determine which methods need to be implemented, some may have // been overridden. Note that ::isPropertyAccessor is not the method // we want, that just indicates if the decl came from a // property. What we want to know is if the method is defined in // this implementation. auto *Getter = PID->getGetterMethodDecl(); if (!Getter || Getter->isSynthesizedAccessorStub()) CodeGenFunction(*this).GenerateObjCGetter( const_cast(D), PID); auto *Setter = PID->getSetterMethodDecl(); if (!PD->isReadOnly() && (!Setter || Setter->isSynthesizedAccessorStub())) CodeGenFunction(*this).GenerateObjCSetter( const_cast(D), PID); } } } static bool needsDestructMethod(ObjCImplementationDecl *impl) { const ObjCInterfaceDecl *iface = impl->getClassInterface(); for (const ObjCIvarDecl *ivar = iface->all_declared_ivar_begin(); ivar; ivar = ivar->getNextIvar()) if (ivar->getType().isDestructedType()) return true; return false; } static bool AllTrivialInitializers(CodeGenModule &CGM, ObjCImplementationDecl *D) { CodeGenFunction CGF(CGM); for (ObjCImplementationDecl::init_iterator B = D->init_begin(), E = D->init_end(); B != E; ++B) { CXXCtorInitializer *CtorInitExp = *B; Expr *Init = CtorInitExp->getInit(); if (!CGF.isTrivialInitializer(Init)) return false; } return true; } /// EmitObjCIvarInitializations - Emit information for ivar initialization /// for an implementation. void CodeGenModule::EmitObjCIvarInitializations(ObjCImplementationDecl *D) { // We might need a .cxx_destruct even if we don't have any ivar initializers. if (needsDestructMethod(D)) { const IdentifierInfo *II = &getContext().Idents.get(".cxx_destruct"); Selector cxxSelector = getContext().Selectors.getSelector(0, &II); ObjCMethodDecl *DTORMethod = ObjCMethodDecl::Create( getContext(), D->getLocation(), D->getLocation(), cxxSelector, getContext().VoidTy, nullptr, D, /*isInstance=*/true, /*isVariadic=*/false, /*isPropertyAccessor=*/true, /*isSynthesizedAccessorStub=*/false, /*isImplicitlyDeclared=*/true, /*isDefined=*/false, ObjCImplementationControl::Required); D->addInstanceMethod(DTORMethod); CodeGenFunction(*this).GenerateObjCCtorDtorMethod(D, DTORMethod, false); D->setHasDestructors(true); } // If the implementation doesn't have any ivar initializers, we don't need // a .cxx_construct. if (D->getNumIvarInitializers() == 0 || AllTrivialInitializers(*this, D)) return; const IdentifierInfo *II = &getContext().Idents.get(".cxx_construct"); Selector cxxSelector = getContext().Selectors.getSelector(0, &II); // The constructor returns 'self'. ObjCMethodDecl *CTORMethod = ObjCMethodDecl::Create( getContext(), D->getLocation(), D->getLocation(), cxxSelector, getContext().getObjCIdType(), nullptr, D, /*isInstance=*/true, /*isVariadic=*/false, /*isPropertyAccessor=*/true, /*isSynthesizedAccessorStub=*/false, /*isImplicitlyDeclared=*/true, /*isDefined=*/false, ObjCImplementationControl::Required); D->addInstanceMethod(CTORMethod); CodeGenFunction(*this).GenerateObjCCtorDtorMethod(D, CTORMethod, true); D->setHasNonZeroConstructors(true); } // EmitLinkageSpec - Emit all declarations in a linkage spec. void CodeGenModule::EmitLinkageSpec(const LinkageSpecDecl *LSD) { if (LSD->getLanguage() != LinkageSpecLanguageIDs::C && LSD->getLanguage() != LinkageSpecLanguageIDs::CXX) { ErrorUnsupported(LSD, "linkage spec"); return; } EmitDeclContext(LSD); } void CodeGenModule::EmitTopLevelStmt(const TopLevelStmtDecl *D) { // Device code should not be at top level. if (LangOpts.CUDA && LangOpts.CUDAIsDevice) return; std::unique_ptr &CurCGF = GlobalTopLevelStmtBlockInFlight.first; // We emitted a top-level stmt but after it there is initialization. // Stop squashing the top-level stmts into a single function. if (CurCGF && CXXGlobalInits.back() != CurCGF->CurFn) { CurCGF->FinishFunction(D->getEndLoc()); CurCGF = nullptr; } if (!CurCGF) { // void __stmts__N(void) // FIXME: Ask the ABI name mangler to pick a name. std::string Name = "__stmts__" + llvm::utostr(CXXGlobalInits.size()); FunctionArgList Args; QualType RetTy = getContext().VoidTy; const CGFunctionInfo &FnInfo = getTypes().arrangeBuiltinFunctionDeclaration(RetTy, Args); llvm::FunctionType *FnTy = getTypes().GetFunctionType(FnInfo); llvm::Function *Fn = llvm::Function::Create( FnTy, llvm::GlobalValue::InternalLinkage, Name, &getModule()); CurCGF.reset(new CodeGenFunction(*this)); GlobalTopLevelStmtBlockInFlight.second = D; CurCGF->StartFunction(GlobalDecl(), RetTy, Fn, FnInfo, Args, D->getBeginLoc(), D->getBeginLoc()); CXXGlobalInits.push_back(Fn); } CurCGF->EmitStmt(D->getStmt()); } void CodeGenModule::EmitDeclContext(const DeclContext *DC) { for (auto *I : DC->decls()) { // Unlike other DeclContexts, the contents of an ObjCImplDecl at TU scope // are themselves considered "top-level", so EmitTopLevelDecl on an // ObjCImplDecl does not recursively visit them. We need to do that in // case they're nested inside another construct (LinkageSpecDecl / // ExportDecl) that does stop them from being considered "top-level". if (auto *OID = dyn_cast(I)) { for (auto *M : OID->methods()) EmitTopLevelDecl(M); } EmitTopLevelDecl(I); } } /// EmitTopLevelDecl - Emit code for a single top level declaration. void CodeGenModule::EmitTopLevelDecl(Decl *D) { // Ignore dependent declarations. if (D->isTemplated()) return; // Consteval function shouldn't be emitted. if (auto *FD = dyn_cast(D); FD && FD->isImmediateFunction()) return; switch (D->getKind()) { case Decl::CXXConversion: case Decl::CXXMethod: case Decl::Function: EmitGlobal(cast(D)); // Always provide some coverage mapping // even for the functions that aren't emitted. AddDeferredUnusedCoverageMapping(D); break; case Decl::CXXDeductionGuide: // Function-like, but does not result in code emission. break; case Decl::Var: case Decl::Decomposition: case Decl::VarTemplateSpecialization: EmitGlobal(cast(D)); if (auto *DD = dyn_cast(D)) for (auto *B : DD->bindings()) if (auto *HD = B->getHoldingVar()) EmitGlobal(HD); break; // Indirect fields from global anonymous structs and unions can be // ignored; only the actual variable requires IR gen support. case Decl::IndirectField: break; // C++ Decls case Decl::Namespace: EmitDeclContext(cast(D)); break; case Decl::ClassTemplateSpecialization: { const auto *Spec = cast(D); if (CGDebugInfo *DI = getModuleDebugInfo()) if (Spec->getSpecializationKind() == TSK_ExplicitInstantiationDefinition && Spec->hasDefinition()) DI->completeTemplateDefinition(*Spec); } [[fallthrough]]; case Decl::CXXRecord: { CXXRecordDecl *CRD = cast(D); if (CGDebugInfo *DI = getModuleDebugInfo()) { if (CRD->hasDefinition()) DI->EmitAndRetainType(getContext().getRecordType(cast(D))); if (auto *ES = D->getASTContext().getExternalSource()) if (ES->hasExternalDefinitions(D) == ExternalASTSource::EK_Never) DI->completeUnusedClass(*CRD); } // Emit any static data members, they may be definitions. for (auto *I : CRD->decls()) if (isa(I) || isa(I)) EmitTopLevelDecl(I); break; } // No code generation needed. case Decl::UsingShadow: case Decl::ClassTemplate: case Decl::VarTemplate: case Decl::Concept: case Decl::VarTemplatePartialSpecialization: case Decl::FunctionTemplate: case Decl::TypeAliasTemplate: case Decl::Block: case Decl::Empty: case Decl::Binding: break; case Decl::Using: // using X; [C++] if (CGDebugInfo *DI = getModuleDebugInfo()) DI->EmitUsingDecl(cast(*D)); break; case Decl::UsingEnum: // using enum X; [C++] if (CGDebugInfo *DI = getModuleDebugInfo()) DI->EmitUsingEnumDecl(cast(*D)); break; case Decl::NamespaceAlias: if (CGDebugInfo *DI = getModuleDebugInfo()) DI->EmitNamespaceAlias(cast(*D)); break; case Decl::UsingDirective: // using namespace X; [C++] if (CGDebugInfo *DI = getModuleDebugInfo()) DI->EmitUsingDirective(cast(*D)); break; case Decl::CXXConstructor: getCXXABI().EmitCXXConstructors(cast(D)); break; case Decl::CXXDestructor: getCXXABI().EmitCXXDestructors(cast(D)); break; case Decl::StaticAssert: // Nothing to do. break; // Objective-C Decls // Forward declarations, no (immediate) code generation. case Decl::ObjCInterface: case Decl::ObjCCategory: break; case Decl::ObjCProtocol: { auto *Proto = cast(D); if (Proto->isThisDeclarationADefinition()) ObjCRuntime->GenerateProtocol(Proto); break; } case Decl::ObjCCategoryImpl: // Categories have properties but don't support synthesize so we // can ignore them here. ObjCRuntime->GenerateCategory(cast(D)); break; case Decl::ObjCImplementation: { auto *OMD = cast(D); EmitObjCPropertyImplementations(OMD); EmitObjCIvarInitializations(OMD); ObjCRuntime->GenerateClass(OMD); // Emit global variable debug information. if (CGDebugInfo *DI = getModuleDebugInfo()) if (getCodeGenOpts().hasReducedDebugInfo()) DI->getOrCreateInterfaceType(getContext().getObjCInterfaceType( OMD->getClassInterface()), OMD->getLocation()); break; } case Decl::ObjCMethod: { auto *OMD = cast(D); // If this is not a prototype, emit the body. if (OMD->getBody()) CodeGenFunction(*this).GenerateObjCMethod(OMD); break; } case Decl::ObjCCompatibleAlias: ObjCRuntime->RegisterAlias(cast(D)); break; case Decl::PragmaComment: { const auto *PCD = cast(D); switch (PCD->getCommentKind()) { case PCK_Unknown: llvm_unreachable("unexpected pragma comment kind"); case PCK_Linker: AppendLinkerOptions(PCD->getArg()); break; case PCK_Lib: AddDependentLib(PCD->getArg()); break; case PCK_Compiler: case PCK_ExeStr: case PCK_User: break; // We ignore all of these. } break; } case Decl::PragmaDetectMismatch: { const auto *PDMD = cast(D); AddDetectMismatch(PDMD->getName(), PDMD->getValue()); break; } case Decl::LinkageSpec: EmitLinkageSpec(cast(D)); break; case Decl::FileScopeAsm: { // File-scope asm is ignored during device-side CUDA compilation. if (LangOpts.CUDA && LangOpts.CUDAIsDevice) break; // File-scope asm is ignored during device-side OpenMP compilation. if (LangOpts.OpenMPIsTargetDevice) break; // File-scope asm is ignored during device-side SYCL compilation. if (LangOpts.SYCLIsDevice) break; auto *AD = cast(D); getModule().appendModuleInlineAsm(AD->getAsmString()->getString()); break; } case Decl::TopLevelStmt: EmitTopLevelStmt(cast(D)); break; case Decl::Import: { auto *Import = cast(D); // If we've already imported this module, we're done. if (!ImportedModules.insert(Import->getImportedModule())) break; // Emit debug information for direct imports. if (!Import->getImportedOwningModule()) { if (CGDebugInfo *DI = getModuleDebugInfo()) DI->EmitImportDecl(*Import); } // For C++ standard modules we are done - we will call the module // initializer for imported modules, and that will likewise call those for // any imports it has. - if (CXX20ModuleInits && Import->getImportedOwningModule() && - !Import->getImportedOwningModule()->isModuleMapModule()) + if (CXX20ModuleInits && Import->getImportedModule() && + Import->getImportedModule()->isNamedModule()) break; // For clang C++ module map modules the initializers for sub-modules are // emitted here. // Find all of the submodules and emit the module initializers. llvm::SmallPtrSet Visited; SmallVector Stack; Visited.insert(Import->getImportedModule()); Stack.push_back(Import->getImportedModule()); while (!Stack.empty()) { clang::Module *Mod = Stack.pop_back_val(); if (!EmittedModuleInitializers.insert(Mod).second) continue; for (auto *D : Context.getModuleInitializers(Mod)) EmitTopLevelDecl(D); // Visit the submodules of this module. for (auto *Submodule : Mod->submodules()) { // Skip explicit children; they need to be explicitly imported to emit // the initializers. if (Submodule->IsExplicit) continue; if (Visited.insert(Submodule).second) Stack.push_back(Submodule); } } break; } case Decl::Export: EmitDeclContext(cast(D)); break; case Decl::OMPThreadPrivate: EmitOMPThreadPrivateDecl(cast(D)); break; case Decl::OMPAllocate: EmitOMPAllocateDecl(cast(D)); break; case Decl::OMPDeclareReduction: EmitOMPDeclareReduction(cast(D)); break; case Decl::OMPDeclareMapper: EmitOMPDeclareMapper(cast(D)); break; case Decl::OMPRequires: EmitOMPRequiresDecl(cast(D)); break; case Decl::Typedef: case Decl::TypeAlias: // using foo = bar; [C++11] if (CGDebugInfo *DI = getModuleDebugInfo()) DI->EmitAndRetainType( getContext().getTypedefType(cast(D))); break; case Decl::Record: if (CGDebugInfo *DI = getModuleDebugInfo()) if (cast(D)->getDefinition()) DI->EmitAndRetainType(getContext().getRecordType(cast(D))); break; case Decl::Enum: if (CGDebugInfo *DI = getModuleDebugInfo()) if (cast(D)->getDefinition()) DI->EmitAndRetainType(getContext().getEnumType(cast(D))); break; case Decl::HLSLBuffer: getHLSLRuntime().addBuffer(cast(D)); break; default: // Make sure we handled everything we should, every other kind is a // non-top-level decl. FIXME: Would be nice to have an isTopLevelDeclKind // function. Need to recode Decl::Kind to do that easily. assert(isa(D) && "Unsupported decl kind"); break; } } void CodeGenModule::AddDeferredUnusedCoverageMapping(Decl *D) { // Do we need to generate coverage mapping? if (!CodeGenOpts.CoverageMapping) return; switch (D->getKind()) { case Decl::CXXConversion: case Decl::CXXMethod: case Decl::Function: case Decl::ObjCMethod: case Decl::CXXConstructor: case Decl::CXXDestructor: { if (!cast(D)->doesThisDeclarationHaveABody()) break; SourceManager &SM = getContext().getSourceManager(); if (LimitedCoverage && SM.getMainFileID() != SM.getFileID(D->getBeginLoc())) break; if (!llvm::coverage::SystemHeadersCoverage && SM.isInSystemHeader(D->getBeginLoc())) break; DeferredEmptyCoverageMappingDecls.try_emplace(D, true); break; } default: break; }; } void CodeGenModule::ClearUnusedCoverageMapping(const Decl *D) { // Do we need to generate coverage mapping? if (!CodeGenOpts.CoverageMapping) return; if (const auto *Fn = dyn_cast(D)) { if (Fn->isTemplateInstantiation()) ClearUnusedCoverageMapping(Fn->getTemplateInstantiationPattern()); } DeferredEmptyCoverageMappingDecls.insert_or_assign(D, false); } void CodeGenModule::EmitDeferredUnusedCoverageMappings() { // We call takeVector() here to avoid use-after-free. // FIXME: DeferredEmptyCoverageMappingDecls is getting mutated because // we deserialize function bodies to emit coverage info for them, and that // deserializes more declarations. How should we handle that case? for (const auto &Entry : DeferredEmptyCoverageMappingDecls.takeVector()) { if (!Entry.second) continue; const Decl *D = Entry.first; switch (D->getKind()) { case Decl::CXXConversion: case Decl::CXXMethod: case Decl::Function: case Decl::ObjCMethod: { CodeGenPGO PGO(*this); GlobalDecl GD(cast(D)); PGO.emitEmptyCounterMapping(D, getMangledName(GD), getFunctionLinkage(GD)); break; } case Decl::CXXConstructor: { CodeGenPGO PGO(*this); GlobalDecl GD(cast(D), Ctor_Base); PGO.emitEmptyCounterMapping(D, getMangledName(GD), getFunctionLinkage(GD)); break; } case Decl::CXXDestructor: { CodeGenPGO PGO(*this); GlobalDecl GD(cast(D), Dtor_Base); PGO.emitEmptyCounterMapping(D, getMangledName(GD), getFunctionLinkage(GD)); break; } default: break; }; } } void CodeGenModule::EmitMainVoidAlias() { // In order to transition away from "__original_main" gracefully, emit an // alias for "main" in the no-argument case so that libc can detect when // new-style no-argument main is in used. if (llvm::Function *F = getModule().getFunction("main")) { if (!F->isDeclaration() && F->arg_size() == 0 && !F->isVarArg() && F->getReturnType()->isIntegerTy(Context.getTargetInfo().getIntWidth())) { auto *GA = llvm::GlobalAlias::create("__main_void", F); GA->setVisibility(llvm::GlobalValue::HiddenVisibility); } } } /// Turns the given pointer into a constant. static llvm::Constant *GetPointerConstant(llvm::LLVMContext &Context, const void *Ptr) { uintptr_t PtrInt = reinterpret_cast(Ptr); llvm::Type *i64 = llvm::Type::getInt64Ty(Context); return llvm::ConstantInt::get(i64, PtrInt); } static void EmitGlobalDeclMetadata(CodeGenModule &CGM, llvm::NamedMDNode *&GlobalMetadata, GlobalDecl D, llvm::GlobalValue *Addr) { if (!GlobalMetadata) GlobalMetadata = CGM.getModule().getOrInsertNamedMetadata("clang.global.decl.ptrs"); // TODO: should we report variant information for ctors/dtors? llvm::Metadata *Ops[] = {llvm::ConstantAsMetadata::get(Addr), llvm::ConstantAsMetadata::get(GetPointerConstant( CGM.getLLVMContext(), D.getDecl()))}; GlobalMetadata->addOperand(llvm::MDNode::get(CGM.getLLVMContext(), Ops)); } bool CodeGenModule::CheckAndReplaceExternCIFuncs(llvm::GlobalValue *Elem, llvm::GlobalValue *CppFunc) { // Store the list of ifuncs we need to replace uses in. llvm::SmallVector IFuncs; // List of ConstantExprs that we should be able to delete when we're done // here. llvm::SmallVector CEs; // It isn't valid to replace the extern-C ifuncs if all we find is itself! if (Elem == CppFunc) return false; // First make sure that all users of this are ifuncs (or ifuncs via a // bitcast), and collect the list of ifuncs and CEs so we can work on them // later. for (llvm::User *User : Elem->users()) { // Users can either be a bitcast ConstExpr that is used by the ifuncs, OR an // ifunc directly. In any other case, just give up, as we don't know what we // could break by changing those. if (auto *ConstExpr = dyn_cast(User)) { if (ConstExpr->getOpcode() != llvm::Instruction::BitCast) return false; for (llvm::User *CEUser : ConstExpr->users()) { if (auto *IFunc = dyn_cast(CEUser)) { IFuncs.push_back(IFunc); } else { return false; } } CEs.push_back(ConstExpr); } else if (auto *IFunc = dyn_cast(User)) { IFuncs.push_back(IFunc); } else { // This user is one we don't know how to handle, so fail redirection. This // will result in an ifunc retaining a resolver name that will ultimately // fail to be resolved to a defined function. return false; } } // Now we know this is a valid case where we can do this alias replacement, we // need to remove all of the references to Elem (and the bitcasts!) so we can // delete it. for (llvm::GlobalIFunc *IFunc : IFuncs) IFunc->setResolver(nullptr); for (llvm::ConstantExpr *ConstExpr : CEs) ConstExpr->destroyConstant(); // We should now be out of uses for the 'old' version of this function, so we // can erase it as well. Elem->eraseFromParent(); for (llvm::GlobalIFunc *IFunc : IFuncs) { // The type of the resolver is always just a function-type that returns the // type of the IFunc, so create that here. If the type of the actual // resolver doesn't match, it just gets bitcast to the right thing. auto *ResolverTy = llvm::FunctionType::get(IFunc->getType(), /*isVarArg*/ false); llvm::Constant *Resolver = GetOrCreateLLVMFunction( CppFunc->getName(), ResolverTy, {}, /*ForVTable*/ false); IFunc->setResolver(Resolver); } return true; } /// For each function which is declared within an extern "C" region and marked /// as 'used', but has internal linkage, create an alias from the unmangled /// name to the mangled name if possible. People expect to be able to refer /// to such functions with an unmangled name from inline assembly within the /// same translation unit. void CodeGenModule::EmitStaticExternCAliases() { if (!getTargetCodeGenInfo().shouldEmitStaticExternCAliases()) return; for (auto &I : StaticExternCValues) { const IdentifierInfo *Name = I.first; llvm::GlobalValue *Val = I.second; // If Val is null, that implies there were multiple declarations that each // had a claim to the unmangled name. In this case, generation of the alias // is suppressed. See CodeGenModule::MaybeHandleStaticInExternC. if (!Val) break; llvm::GlobalValue *ExistingElem = getModule().getNamedValue(Name->getName()); // If there is either not something already by this name, or we were able to // replace all uses from IFuncs, create the alias. if (!ExistingElem || CheckAndReplaceExternCIFuncs(ExistingElem, Val)) addCompilerUsedGlobal(llvm::GlobalAlias::create(Name->getName(), Val)); } } bool CodeGenModule::lookupRepresentativeDecl(StringRef MangledName, GlobalDecl &Result) const { auto Res = Manglings.find(MangledName); if (Res == Manglings.end()) return false; Result = Res->getValue(); return true; } /// Emits metadata nodes associating all the global values in the /// current module with the Decls they came from. This is useful for /// projects using IR gen as a subroutine. /// /// Since there's currently no way to associate an MDNode directly /// with an llvm::GlobalValue, we create a global named metadata /// with the name 'clang.global.decl.ptrs'. void CodeGenModule::EmitDeclMetadata() { llvm::NamedMDNode *GlobalMetadata = nullptr; for (auto &I : MangledDeclNames) { llvm::GlobalValue *Addr = getModule().getNamedValue(I.second); // Some mangled names don't necessarily have an associated GlobalValue // in this module, e.g. if we mangled it for DebugInfo. if (Addr) EmitGlobalDeclMetadata(*this, GlobalMetadata, I.first, Addr); } } /// Emits metadata nodes for all the local variables in the current /// function. void CodeGenFunction::EmitDeclMetadata() { if (LocalDeclMap.empty()) return; llvm::LLVMContext &Context = getLLVMContext(); // Find the unique metadata ID for this name. unsigned DeclPtrKind = Context.getMDKindID("clang.decl.ptr"); llvm::NamedMDNode *GlobalMetadata = nullptr; for (auto &I : LocalDeclMap) { const Decl *D = I.first; llvm::Value *Addr = I.second.emitRawPointer(*this); if (auto *Alloca = dyn_cast(Addr)) { llvm::Value *DAddr = GetPointerConstant(getLLVMContext(), D); Alloca->setMetadata( DeclPtrKind, llvm::MDNode::get( Context, llvm::ValueAsMetadata::getConstant(DAddr))); } else if (auto *GV = dyn_cast(Addr)) { GlobalDecl GD = GlobalDecl(cast(D)); EmitGlobalDeclMetadata(CGM, GlobalMetadata, GD, GV); } } } void CodeGenModule::EmitVersionIdentMetadata() { llvm::NamedMDNode *IdentMetadata = TheModule.getOrInsertNamedMetadata("llvm.ident"); std::string Version = getClangFullVersion(); llvm::LLVMContext &Ctx = TheModule.getContext(); llvm::Metadata *IdentNode[] = {llvm::MDString::get(Ctx, Version)}; IdentMetadata->addOperand(llvm::MDNode::get(Ctx, IdentNode)); } void CodeGenModule::EmitCommandLineMetadata() { llvm::NamedMDNode *CommandLineMetadata = TheModule.getOrInsertNamedMetadata("llvm.commandline"); std::string CommandLine = getCodeGenOpts().RecordCommandLine; llvm::LLVMContext &Ctx = TheModule.getContext(); llvm::Metadata *CommandLineNode[] = {llvm::MDString::get(Ctx, CommandLine)}; CommandLineMetadata->addOperand(llvm::MDNode::get(Ctx, CommandLineNode)); } void CodeGenModule::EmitCoverageFile() { llvm::NamedMDNode *CUNode = TheModule.getNamedMetadata("llvm.dbg.cu"); if (!CUNode) return; llvm::NamedMDNode *GCov = TheModule.getOrInsertNamedMetadata("llvm.gcov"); llvm::LLVMContext &Ctx = TheModule.getContext(); auto *CoverageDataFile = llvm::MDString::get(Ctx, getCodeGenOpts().CoverageDataFile); auto *CoverageNotesFile = llvm::MDString::get(Ctx, getCodeGenOpts().CoverageNotesFile); for (int i = 0, e = CUNode->getNumOperands(); i != e; ++i) { llvm::MDNode *CU = CUNode->getOperand(i); llvm::Metadata *Elts[] = {CoverageNotesFile, CoverageDataFile, CU}; GCov->addOperand(llvm::MDNode::get(Ctx, Elts)); } } llvm::Constant *CodeGenModule::GetAddrOfRTTIDescriptor(QualType Ty, bool ForEH) { // Return a bogus pointer if RTTI is disabled, unless it's for EH. // FIXME: should we even be calling this method if RTTI is disabled // and it's not for EH? if (!shouldEmitRTTI(ForEH)) return llvm::Constant::getNullValue(GlobalsInt8PtrTy); if (ForEH && Ty->isObjCObjectPointerType() && LangOpts.ObjCRuntime.isGNUFamily()) return ObjCRuntime->GetEHType(Ty); return getCXXABI().getAddrOfRTTIDescriptor(Ty); } void CodeGenModule::EmitOMPThreadPrivateDecl(const OMPThreadPrivateDecl *D) { // Do not emit threadprivates in simd-only mode. if (LangOpts.OpenMP && LangOpts.OpenMPSimd) return; for (auto RefExpr : D->varlists()) { auto *VD = cast(cast(RefExpr)->getDecl()); bool PerformInit = VD->getAnyInitializer() && !VD->getAnyInitializer()->isConstantInitializer(getContext(), /*ForRef=*/false); Address Addr(GetAddrOfGlobalVar(VD), getTypes().ConvertTypeForMem(VD->getType()), getContext().getDeclAlign(VD)); if (auto InitFunction = getOpenMPRuntime().emitThreadPrivateVarDefinition( VD, Addr, RefExpr->getBeginLoc(), PerformInit)) CXXGlobalInits.push_back(InitFunction); } } llvm::Metadata * CodeGenModule::CreateMetadataIdentifierImpl(QualType T, MetadataTypeMap &Map, StringRef Suffix) { if (auto *FnType = T->getAs()) T = getContext().getFunctionType( FnType->getReturnType(), FnType->getParamTypes(), FnType->getExtProtoInfo().withExceptionSpec(EST_None)); llvm::Metadata *&InternalId = Map[T.getCanonicalType()]; if (InternalId) return InternalId; if (isExternallyVisible(T->getLinkage())) { std::string OutName; llvm::raw_string_ostream Out(OutName); getCXXABI().getMangleContext().mangleCanonicalTypeName( T, Out, getCodeGenOpts().SanitizeCfiICallNormalizeIntegers); if (getCodeGenOpts().SanitizeCfiICallNormalizeIntegers) Out << ".normalized"; Out << Suffix; InternalId = llvm::MDString::get(getLLVMContext(), Out.str()); } else { InternalId = llvm::MDNode::getDistinct(getLLVMContext(), llvm::ArrayRef()); } return InternalId; } llvm::Metadata *CodeGenModule::CreateMetadataIdentifierForType(QualType T) { return CreateMetadataIdentifierImpl(T, MetadataIdMap, ""); } llvm::Metadata * CodeGenModule::CreateMetadataIdentifierForVirtualMemPtrType(QualType T) { return CreateMetadataIdentifierImpl(T, VirtualMetadataIdMap, ".virtual"); } // Generalize pointer types to a void pointer with the qualifiers of the // originally pointed-to type, e.g. 'const char *' and 'char * const *' // generalize to 'const void *' while 'char *' and 'const char **' generalize to // 'void *'. static QualType GeneralizeType(ASTContext &Ctx, QualType Ty) { if (!Ty->isPointerType()) return Ty; return Ctx.getPointerType( QualType(Ctx.VoidTy).withCVRQualifiers( Ty->getPointeeType().getCVRQualifiers())); } // Apply type generalization to a FunctionType's return and argument types static QualType GeneralizeFunctionType(ASTContext &Ctx, QualType Ty) { if (auto *FnType = Ty->getAs()) { SmallVector GeneralizedParams; for (auto &Param : FnType->param_types()) GeneralizedParams.push_back(GeneralizeType(Ctx, Param)); return Ctx.getFunctionType( GeneralizeType(Ctx, FnType->getReturnType()), GeneralizedParams, FnType->getExtProtoInfo()); } if (auto *FnType = Ty->getAs()) return Ctx.getFunctionNoProtoType( GeneralizeType(Ctx, FnType->getReturnType())); llvm_unreachable("Encountered unknown FunctionType"); } llvm::Metadata *CodeGenModule::CreateMetadataIdentifierGeneralized(QualType T) { return CreateMetadataIdentifierImpl(GeneralizeFunctionType(getContext(), T), GeneralizedMetadataIdMap, ".generalized"); } /// Returns whether this module needs the "all-vtables" type identifier. bool CodeGenModule::NeedAllVtablesTypeId() const { // Returns true if at least one of vtable-based CFI checkers is enabled and // is not in the trapping mode. return ((LangOpts.Sanitize.has(SanitizerKind::CFIVCall) && !CodeGenOpts.SanitizeTrap.has(SanitizerKind::CFIVCall)) || (LangOpts.Sanitize.has(SanitizerKind::CFINVCall) && !CodeGenOpts.SanitizeTrap.has(SanitizerKind::CFINVCall)) || (LangOpts.Sanitize.has(SanitizerKind::CFIDerivedCast) && !CodeGenOpts.SanitizeTrap.has(SanitizerKind::CFIDerivedCast)) || (LangOpts.Sanitize.has(SanitizerKind::CFIUnrelatedCast) && !CodeGenOpts.SanitizeTrap.has(SanitizerKind::CFIUnrelatedCast))); } void CodeGenModule::AddVTableTypeMetadata(llvm::GlobalVariable *VTable, CharUnits Offset, const CXXRecordDecl *RD) { llvm::Metadata *MD = CreateMetadataIdentifierForType(QualType(RD->getTypeForDecl(), 0)); VTable->addTypeMetadata(Offset.getQuantity(), MD); if (CodeGenOpts.SanitizeCfiCrossDso) if (auto CrossDsoTypeId = CreateCrossDsoCfiTypeId(MD)) VTable->addTypeMetadata(Offset.getQuantity(), llvm::ConstantAsMetadata::get(CrossDsoTypeId)); if (NeedAllVtablesTypeId()) { llvm::Metadata *MD = llvm::MDString::get(getLLVMContext(), "all-vtables"); VTable->addTypeMetadata(Offset.getQuantity(), MD); } } llvm::SanitizerStatReport &CodeGenModule::getSanStats() { if (!SanStats) SanStats = std::make_unique(&getModule()); return *SanStats; } llvm::Value * CodeGenModule::createOpenCLIntToSamplerConversion(const Expr *E, CodeGenFunction &CGF) { llvm::Constant *C = ConstantEmitter(CGF).emitAbstract(E, E->getType()); auto *SamplerT = getOpenCLRuntime().getSamplerType(E->getType().getTypePtr()); auto *FTy = llvm::FunctionType::get(SamplerT, {C->getType()}, false); auto *Call = CGF.EmitRuntimeCall( CreateRuntimeFunction(FTy, "__translate_sampler_initializer"), {C}); return Call; } CharUnits CodeGenModule::getNaturalPointeeTypeAlignment( QualType T, LValueBaseInfo *BaseInfo, TBAAAccessInfo *TBAAInfo) { return getNaturalTypeAlignment(T->getPointeeType(), BaseInfo, TBAAInfo, /* forPointeeType= */ true); } CharUnits CodeGenModule::getNaturalTypeAlignment(QualType T, LValueBaseInfo *BaseInfo, TBAAAccessInfo *TBAAInfo, bool forPointeeType) { if (TBAAInfo) *TBAAInfo = getTBAAAccessInfo(T); // FIXME: This duplicates logic in ASTContext::getTypeAlignIfKnown. But // that doesn't return the information we need to compute BaseInfo. // Honor alignment typedef attributes even on incomplete types. // We also honor them straight for C++ class types, even as pointees; // there's an expressivity gap here. if (auto TT = T->getAs()) { if (auto Align = TT->getDecl()->getMaxAlignment()) { if (BaseInfo) *BaseInfo = LValueBaseInfo(AlignmentSource::AttributedType); return getContext().toCharUnitsFromBits(Align); } } bool AlignForArray = T->isArrayType(); // Analyze the base element type, so we don't get confused by incomplete // array types. T = getContext().getBaseElementType(T); if (T->isIncompleteType()) { // We could try to replicate the logic from // ASTContext::getTypeAlignIfKnown, but nothing uses the alignment if the // type is incomplete, so it's impossible to test. We could try to reuse // getTypeAlignIfKnown, but that doesn't return the information we need // to set BaseInfo. So just ignore the possibility that the alignment is // greater than one. if (BaseInfo) *BaseInfo = LValueBaseInfo(AlignmentSource::Type); return CharUnits::One(); } if (BaseInfo) *BaseInfo = LValueBaseInfo(AlignmentSource::Type); CharUnits Alignment; const CXXRecordDecl *RD; if (T.getQualifiers().hasUnaligned()) { Alignment = CharUnits::One(); } else if (forPointeeType && !AlignForArray && (RD = T->getAsCXXRecordDecl())) { // For C++ class pointees, we don't know whether we're pointing at a // base or a complete object, so we generally need to use the // non-virtual alignment. Alignment = getClassPointerAlignment(RD); } else { Alignment = getContext().getTypeAlignInChars(T); } // Cap to the global maximum type alignment unless the alignment // was somehow explicit on the type. if (unsigned MaxAlign = getLangOpts().MaxTypeAlign) { if (Alignment.getQuantity() > MaxAlign && !getContext().isAlignmentRequired(T)) Alignment = CharUnits::fromQuantity(MaxAlign); } return Alignment; } bool CodeGenModule::stopAutoInit() { unsigned StopAfter = getContext().getLangOpts().TrivialAutoVarInitStopAfter; if (StopAfter) { // This number is positive only when -ftrivial-auto-var-init-stop-after=* is // used if (NumAutoVarInit >= StopAfter) { return true; } if (!NumAutoVarInit) { unsigned DiagID = getDiags().getCustomDiagID( DiagnosticsEngine::Warning, "-ftrivial-auto-var-init-stop-after=%0 has been enabled to limit the " "number of times ftrivial-auto-var-init=%1 gets applied."); getDiags().Report(DiagID) << StopAfter << (getContext().getLangOpts().getTrivialAutoVarInit() == LangOptions::TrivialAutoVarInitKind::Zero ? "zero" : "pattern"); } ++NumAutoVarInit; } return false; } void CodeGenModule::printPostfixForExternalizedDecl(llvm::raw_ostream &OS, const Decl *D) const { // ptxas does not allow '.' in symbol names. On the other hand, HIP prefers // postfix beginning with '.' since the symbol name can be demangled. if (LangOpts.HIP) OS << (isa(D) ? ".static." : ".intern."); else OS << (isa(D) ? "__static__" : "__intern__"); // If the CUID is not specified we try to generate a unique postfix. if (getLangOpts().CUID.empty()) { SourceManager &SM = getContext().getSourceManager(); PresumedLoc PLoc = SM.getPresumedLoc(D->getLocation()); assert(PLoc.isValid() && "Source location is expected to be valid."); // Get the hash of the user defined macros. llvm::MD5 Hash; llvm::MD5::MD5Result Result; for (const auto &Arg : PreprocessorOpts.Macros) Hash.update(Arg.first); Hash.final(Result); // Get the UniqueID for the file containing the decl. llvm::sys::fs::UniqueID ID; if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { PLoc = SM.getPresumedLoc(D->getLocation(), /*UseLineDirectives=*/false); assert(PLoc.isValid() && "Source location is expected to be valid."); if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) SM.getDiagnostics().Report(diag::err_cannot_open_file) << PLoc.getFilename() << EC.message(); } OS << llvm::format("%x", ID.getFile()) << llvm::format("%x", ID.getDevice()) << "_" << llvm::utohexstr(Result.low(), /*LowerCase=*/true, /*Width=*/8); } else { OS << getContext().getCUIDHash(); } } void CodeGenModule::moveLazyEmissionStates(CodeGenModule *NewBuilder) { assert(DeferredDeclsToEmit.empty() && "Should have emitted all decls deferred to emit."); assert(NewBuilder->DeferredDecls.empty() && "Newly created module should not have deferred decls"); NewBuilder->DeferredDecls = std::move(DeferredDecls); assert(EmittedDeferredDecls.empty() && "Still have (unmerged) EmittedDeferredDecls deferred decls"); assert(NewBuilder->DeferredVTables.empty() && "Newly created module should not have deferred vtables"); NewBuilder->DeferredVTables = std::move(DeferredVTables); assert(NewBuilder->MangledDeclNames.empty() && "Newly created module should not have mangled decl names"); assert(NewBuilder->Manglings.empty() && "Newly created module should not have manglings"); NewBuilder->Manglings = std::move(Manglings); NewBuilder->WeakRefReferences = std::move(WeakRefReferences); NewBuilder->ABI->MangleCtx = std::move(ABI->MangleCtx); } diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index a5268e153bcc..bfb592ae0749 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -1,5061 +1,5065 @@ //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// /// \file /// This file contains the implementation of the UnwrappedLineParser, /// which turns a stream of tokens into UnwrappedLines. /// //===----------------------------------------------------------------------===// #include "UnwrappedLineParser.h" #include "FormatToken.h" #include "FormatTokenLexer.h" #include "FormatTokenSource.h" #include "Macros.h" #include "TokenAnnotator.h" #include "clang/Basic/TokenKinds.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_os_ostream.h" #include "llvm/Support/raw_ostream.h" #include #include #define DEBUG_TYPE "format-parser" namespace clang { namespace format { namespace { void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line, StringRef Prefix = "", bool PrintText = false) { OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn << ")" << (Line.InPPDirective ? " MACRO" : "") << ": "; bool NewLine = false; for (std::list::const_iterator I = Line.Tokens.begin(), E = Line.Tokens.end(); I != E; ++I) { if (NewLine) { OS << Prefix; NewLine = false; } OS << I->Tok->Tok.getName() << "[" << "T=" << (unsigned)I->Tok->getType() << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText << "\"] "; for (SmallVectorImpl::const_iterator CI = I->Children.begin(), CE = I->Children.end(); CI != CE; ++CI) { OS << "\n"; printLine(OS, *CI, (Prefix + " ").str()); NewLine = true; } } if (!NewLine) OS << "\n"; } LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) { printLine(llvm::dbgs(), Line); } class ScopedDeclarationState { public: ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack, bool MustBeDeclaration) : Line(Line), Stack(Stack) { Line.MustBeDeclaration = MustBeDeclaration; Stack.push_back(MustBeDeclaration); } ~ScopedDeclarationState() { Stack.pop_back(); if (!Stack.empty()) Line.MustBeDeclaration = Stack.back(); else Line.MustBeDeclaration = true; } private: UnwrappedLine &Line; llvm::BitVector &Stack; }; } // end anonymous namespace std::ostream &operator<<(std::ostream &Stream, const UnwrappedLine &Line) { llvm::raw_os_ostream OS(Stream); printLine(OS, Line); return Stream; } class ScopedLineState { public: ScopedLineState(UnwrappedLineParser &Parser, bool SwitchToPreprocessorLines = false) : Parser(Parser), OriginalLines(Parser.CurrentLines) { if (SwitchToPreprocessorLines) Parser.CurrentLines = &Parser.PreprocessorDirectives; else if (!Parser.Line->Tokens.empty()) Parser.CurrentLines = &Parser.Line->Tokens.back().Children; PreBlockLine = std::move(Parser.Line); Parser.Line = std::make_unique(); Parser.Line->Level = PreBlockLine->Level; Parser.Line->PPLevel = PreBlockLine->PPLevel; Parser.Line->InPPDirective = PreBlockLine->InPPDirective; Parser.Line->InMacroBody = PreBlockLine->InMacroBody; Parser.Line->UnbracedBodyLevel = PreBlockLine->UnbracedBodyLevel; } ~ScopedLineState() { if (!Parser.Line->Tokens.empty()) Parser.addUnwrappedLine(); assert(Parser.Line->Tokens.empty()); Parser.Line = std::move(PreBlockLine); if (Parser.CurrentLines == &Parser.PreprocessorDirectives) Parser.MustBreakBeforeNextToken = true; Parser.CurrentLines = OriginalLines; } private: UnwrappedLineParser &Parser; std::unique_ptr PreBlockLine; SmallVectorImpl *OriginalLines; }; class CompoundStatementIndenter { public: CompoundStatementIndenter(UnwrappedLineParser *Parser, const FormatStyle &Style, unsigned &LineLevel) : CompoundStatementIndenter(Parser, LineLevel, Style.BraceWrapping.AfterControlStatement, Style.BraceWrapping.IndentBraces) {} CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, bool WrapBrace, bool IndentBrace) : LineLevel(LineLevel), OldLineLevel(LineLevel) { if (WrapBrace) Parser->addUnwrappedLine(); if (IndentBrace) ++LineLevel; } ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } private: unsigned &LineLevel; unsigned OldLineLevel; }; UnwrappedLineParser::UnwrappedLineParser( SourceManager &SourceMgr, const FormatStyle &Style, const AdditionalKeywords &Keywords, unsigned FirstStartColumn, ArrayRef Tokens, UnwrappedLineConsumer &Callback, llvm::SpecificBumpPtrAllocator &Allocator, IdentifierTable &IdentTable) : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), CurrentLines(&Lines), Style(Style), IsCpp(Style.isCpp()), LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords), CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None ? IG_Rejected : IG_Inited), IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn), Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) { assert(IsCpp == LangOpts.CXXOperatorNames); } void UnwrappedLineParser::reset() { PPBranchLevel = -1; IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None ? IG_Rejected : IG_Inited; IncludeGuardToken = nullptr; Line.reset(new UnwrappedLine); CommentsBeforeNextToken.clear(); FormatTok = nullptr; MustBreakBeforeNextToken = false; IsDecltypeAutoFunction = false; PreprocessorDirectives.clear(); CurrentLines = &Lines; DeclarationScopeStack.clear(); NestedTooDeep.clear(); NestedLambdas.clear(); PPStack.clear(); Line->FirstStartColumn = FirstStartColumn; if (!Unexpanded.empty()) for (FormatToken *Token : AllTokens) Token->MacroCtx.reset(); CurrentExpandedLines.clear(); ExpandedLines.clear(); Unexpanded.clear(); InExpansion = false; Reconstruct.reset(); } void UnwrappedLineParser::parse() { IndexedTokenSource TokenSource(AllTokens); Line->FirstStartColumn = FirstStartColumn; do { LLVM_DEBUG(llvm::dbgs() << "----\n"); reset(); Tokens = &TokenSource; TokenSource.reset(); readToken(); parseFile(); // If we found an include guard then all preprocessor directives (other than // the guard) are over-indented by one. if (IncludeGuard == IG_Found) { for (auto &Line : Lines) if (Line.InPPDirective && Line.Level > 0) --Line.Level; } // Create line with eof token. assert(eof()); pushToken(FormatTok); addUnwrappedLine(); // In a first run, format everything with the lines containing macro calls // replaced by the expansion. if (!ExpandedLines.empty()) { LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n"); for (const auto &Line : Lines) { if (!Line.Tokens.empty()) { auto it = ExpandedLines.find(Line.Tokens.begin()->Tok); if (it != ExpandedLines.end()) { for (const auto &Expanded : it->second) { LLVM_DEBUG(printDebugInfo(Expanded)); Callback.consumeUnwrappedLine(Expanded); } continue; } } LLVM_DEBUG(printDebugInfo(Line)); Callback.consumeUnwrappedLine(Line); } Callback.finishRun(); } LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n"); for (const UnwrappedLine &Line : Lines) { LLVM_DEBUG(printDebugInfo(Line)); Callback.consumeUnwrappedLine(Line); } Callback.finishRun(); Lines.clear(); while (!PPLevelBranchIndex.empty() && PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); } if (!PPLevelBranchIndex.empty()) { ++PPLevelBranchIndex.back(); assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); } } while (!PPLevelBranchIndex.empty()); } void UnwrappedLineParser::parseFile() { // The top-level context in a file always has declarations, except for pre- // processor directives and JavaScript files. bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript(); ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, MustBeDeclaration); if (Style.Language == FormatStyle::LK_TextProto) parseBracedList(); else parseLevel(); // Make sure to format the remaining tokens. // // LK_TextProto is special since its top-level is parsed as the body of a // braced list, which does not necessarily have natural line separators such // as a semicolon. Comments after the last entry that have been determined to // not belong to that line, as in: // key: value // // endfile comment // do not have a chance to be put on a line of their own until this point. // Here we add this newline before end-of-file comments. if (Style.Language == FormatStyle::LK_TextProto && !CommentsBeforeNextToken.empty()) { addUnwrappedLine(); } flushComments(true); addUnwrappedLine(); } void UnwrappedLineParser::parseCSharpGenericTypeConstraint() { do { switch (FormatTok->Tok.getKind()) { case tok::l_brace: return; default: if (FormatTok->is(Keywords.kw_where)) { addUnwrappedLine(); nextToken(); parseCSharpGenericTypeConstraint(); break; } nextToken(); break; } } while (!eof()); } void UnwrappedLineParser::parseCSharpAttribute() { int UnpairedSquareBrackets = 1; do { switch (FormatTok->Tok.getKind()) { case tok::r_square: nextToken(); --UnpairedSquareBrackets; if (UnpairedSquareBrackets == 0) { addUnwrappedLine(); return; } break; case tok::l_square: ++UnpairedSquareBrackets; nextToken(); break; default: nextToken(); break; } } while (!eof()); } bool UnwrappedLineParser::precededByCommentOrPPDirective() const { if (!Lines.empty() && Lines.back().InPPDirective) return true; const FormatToken *Previous = Tokens->getPreviousToken(); return Previous && Previous->is(tok::comment) && (Previous->IsMultiline || Previous->NewlinesBefore > 0); } /// \brief Parses a level, that is ???. /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level. /// \param IfKind The \p if statement kind in the level. /// \param IfLeftBrace The left brace of the \p if block in the level. /// \returns true if a simple block of if/else/for/while, or false otherwise. /// (A simple block has a single statement.) bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace, IfStmtKind *IfKind, FormatToken **IfLeftBrace) { const bool InRequiresExpression = OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace); const bool IsPrecededByCommentOrPPDirective = !Style.RemoveBracesLLVM || precededByCommentOrPPDirective(); FormatToken *IfLBrace = nullptr; bool HasDoWhile = false; bool HasLabel = false; unsigned StatementCount = 0; bool SwitchLabelEncountered = false; do { if (FormatTok->isAttribute()) { nextToken(); if (FormatTok->is(tok::l_paren)) parseParens(); continue; } tok::TokenKind Kind = FormatTok->Tok.getKind(); if (FormatTok->is(TT_MacroBlockBegin)) Kind = tok::l_brace; else if (FormatTok->is(TT_MacroBlockEnd)) Kind = tok::r_brace; auto ParseDefault = [this, OpeningBrace, IfKind, &IfLBrace, &HasDoWhile, &HasLabel, &StatementCount] { parseStructuralElement(OpeningBrace, IfKind, &IfLBrace, HasDoWhile ? nullptr : &HasDoWhile, HasLabel ? nullptr : &HasLabel); ++StatementCount; assert(StatementCount > 0 && "StatementCount overflow!"); }; switch (Kind) { case tok::comment: nextToken(); addUnwrappedLine(); break; case tok::l_brace: if (InRequiresExpression) { FormatTok->setFinalizedType(TT_RequiresExpressionLBrace); } else if (FormatTok->Previous && FormatTok->Previous->ClosesRequiresClause) { // We need the 'default' case here to correctly parse a function // l_brace. ParseDefault(); continue; } if (!InRequiresExpression && FormatTok->isNot(TT_MacroBlockBegin)) { if (tryToParseBracedList()) continue; FormatTok->setFinalizedType(TT_BlockLBrace); } parseBlock(); ++StatementCount; assert(StatementCount > 0 && "StatementCount overflow!"); addUnwrappedLine(); break; case tok::r_brace: if (OpeningBrace) { if (!Style.RemoveBracesLLVM || Line->InPPDirective || !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) { return false; } if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel || HasDoWhile || IsPrecededByCommentOrPPDirective || precededByCommentOrPPDirective()) { return false; } const FormatToken *Next = Tokens->peekNextToken(); if (Next->is(tok::comment) && Next->NewlinesBefore == 0) return false; if (IfLeftBrace) *IfLeftBrace = IfLBrace; return true; } nextToken(); addUnwrappedLine(); break; case tok::kw_default: { unsigned StoredPosition = Tokens->getPosition(); auto *Next = Tokens->getNextNonComment(); FormatTok = Tokens->setPosition(StoredPosition); if (!Next->isOneOf(tok::colon, tok::arrow)) { // default not followed by `:` or `->` is not a case label; treat it // like an identifier. parseStructuralElement(); break; } // Else, if it is 'default:', fall through to the case handling. [[fallthrough]]; } case tok::kw_case: if (Style.Language == FormatStyle::LK_Proto || Style.isVerilog() || (Style.isJavaScript() && Line->MustBeDeclaration)) { // Proto: there are no switch/case statements // Verilog: Case labels don't have this word. We handle case // labels including default in TokenAnnotator. // JavaScript: A 'case: string' style field declaration. ParseDefault(); break; } if (!SwitchLabelEncountered && (Style.IndentCaseLabels || (OpeningBrace && OpeningBrace->is(TT_SwitchExpressionLBrace)) || (Line->InPPDirective && Line->Level == 1))) { ++Line->Level; } SwitchLabelEncountered = true; parseStructuralElement(); break; case tok::l_square: if (Style.isCSharp()) { nextToken(); parseCSharpAttribute(); break; } if (handleCppAttributes()) break; [[fallthrough]]; default: ParseDefault(); break; } } while (!eof()); return false; } void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { // We'll parse forward through the tokens until we hit // a closing brace or eof - note that getNextToken() will // parse macros, so this will magically work inside macro // definitions, too. unsigned StoredPosition = Tokens->getPosition(); FormatToken *Tok = FormatTok; const FormatToken *PrevTok = Tok->Previous; // Keep a stack of positions of lbrace tokens. We will // update information about whether an lbrace starts a // braced init list or a different block during the loop. struct StackEntry { FormatToken *Tok; const FormatToken *PrevTok; }; SmallVector LBraceStack; assert(Tok->is(tok::l_brace)); do { auto *NextTok = Tokens->getNextNonComment(); if (!Line->InMacroBody && !Style.isTableGen()) { // Skip PPDirective lines and comments. while (NextTok->is(tok::hash)) { NextTok = Tokens->getNextToken(); if (NextTok->is(tok::pp_not_keyword)) break; do { NextTok = Tokens->getNextToken(); } while (NextTok->NewlinesBefore == 0 && NextTok->isNot(tok::eof)); while (NextTok->is(tok::comment)) NextTok = Tokens->getNextToken(); } } switch (Tok->Tok.getKind()) { case tok::l_brace: if (Style.isJavaScript() && PrevTok) { if (PrevTok->isOneOf(tok::colon, tok::less)) { // A ':' indicates this code is in a type, or a braced list // following a label in an object literal ({a: {b: 1}}). // A '<' could be an object used in a comparison, but that is nonsense // code (can never return true), so more likely it is a generic type // argument (`X<{a: string; b: number}>`). // The code below could be confused by semicolons between the // individual members in a type member list, which would normally // trigger BK_Block. In both cases, this must be parsed as an inline // braced init. Tok->setBlockKind(BK_BracedInit); } else if (PrevTok->is(tok::r_paren)) { // `) { }` can only occur in function or method declarations in JS. Tok->setBlockKind(BK_Block); } } else { Tok->setBlockKind(BK_Unknown); } LBraceStack.push_back({Tok, PrevTok}); break; case tok::r_brace: if (LBraceStack.empty()) break; if (auto *LBrace = LBraceStack.back().Tok; LBrace->is(BK_Unknown)) { bool ProbablyBracedList = false; if (Style.Language == FormatStyle::LK_Proto) { ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); } else if (LBrace->isNot(TT_EnumLBrace)) { // Using OriginalColumn to distinguish between ObjC methods and // binary operators is a bit hacky. bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && NextTok->OriginalColumn == 0; // Try to detect a braced list. Note that regardless how we mark inner // braces here, we will overwrite the BlockKind later if we parse a // braced list (where all blocks inside are by default braced lists), // or when we explicitly detect blocks (for example while parsing // lambdas). // If we already marked the opening brace as braced list, the closing // must also be part of it. ProbablyBracedList = LBrace->is(TT_BracedListLBrace); ProbablyBracedList = ProbablyBracedList || (Style.isJavaScript() && NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, Keywords.kw_as)); ProbablyBracedList = ProbablyBracedList || (IsCpp && (PrevTok->Tok.isLiteral() || NextTok->is(tok::l_paren))); // If there is a comma, semicolon or right paren after the closing // brace, we assume this is a braced initializer list. // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a // braced list in JS. ProbablyBracedList = ProbablyBracedList || NextTok->isOneOf(tok::comma, tok::period, tok::colon, tok::r_paren, tok::r_square, tok::ellipsis); // Distinguish between braced list in a constructor initializer list // followed by constructor body, or just adjacent blocks. ProbablyBracedList = ProbablyBracedList || (NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok && LBraceStack.back().PrevTok->isOneOf(tok::identifier, tok::greater)); ProbablyBracedList = ProbablyBracedList || (NextTok->is(tok::identifier) && !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)); ProbablyBracedList = ProbablyBracedList || (NextTok->is(tok::semi) && (!ExpectClassBody || LBraceStack.size() != 1)); ProbablyBracedList = ProbablyBracedList || (NextTok->isBinaryOperator() && !NextIsObjCMethod); if (!Style.isCSharp() && NextTok->is(tok::l_square)) { // We can have an array subscript after a braced init // list, but C++11 attributes are expected after blocks. NextTok = Tokens->getNextToken(); ProbablyBracedList = NextTok->isNot(tok::l_square); } // Cpp macro definition body that is a nonempty braced list or block: if (IsCpp && Line->InMacroBody && PrevTok != FormatTok && !FormatTok->Previous && NextTok->is(tok::eof) && // A statement can end with only `;` (simple statement), a block // closing brace (compound statement), or `:` (label statement). // If PrevTok is a block opening brace, Tok ends an empty block. !PrevTok->isOneOf(tok::semi, BK_Block, tok::colon)) { ProbablyBracedList = true; } } const auto BlockKind = ProbablyBracedList ? BK_BracedInit : BK_Block; Tok->setBlockKind(BlockKind); LBrace->setBlockKind(BlockKind); } LBraceStack.pop_back(); break; case tok::identifier: if (Tok->isNot(TT_StatementMacro)) break; [[fallthrough]]; case tok::at: case tok::semi: case tok::kw_if: case tok::kw_while: case tok::kw_for: case tok::kw_switch: case tok::kw_try: case tok::kw___try: if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BK_Unknown)) LBraceStack.back().Tok->setBlockKind(BK_Block); break; default: break; } PrevTok = Tok; Tok = NextTok; } while (Tok->isNot(tok::eof) && !LBraceStack.empty()); // Assume other blocks for all unclosed opening braces. for (const auto &Entry : LBraceStack) if (Entry.Tok->is(BK_Unknown)) Entry.Tok->setBlockKind(BK_Block); FormatTok = Tokens->setPosition(StoredPosition); } // Sets the token type of the directly previous right brace. void UnwrappedLineParser::setPreviousRBraceType(TokenType Type) { if (auto Prev = FormatTok->getPreviousNonComment(); Prev && Prev->is(tok::r_brace)) { Prev->setFinalizedType(Type); } } template static inline void hash_combine(std::size_t &seed, const T &v) { std::hash hasher; seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); } size_t UnwrappedLineParser::computePPHash() const { size_t h = 0; for (const auto &i : PPStack) { hash_combine(h, size_t(i.Kind)); hash_combine(h, i.Line); } return h; } // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace // is not null, subtracts its length (plus the preceding space) when computing // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before // running the token annotator on it so that we can restore them afterward. bool UnwrappedLineParser::mightFitOnOneLine( UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const { const auto ColumnLimit = Style.ColumnLimit; if (ColumnLimit == 0) return true; auto &Tokens = ParsedLine.Tokens; assert(!Tokens.empty()); const auto *LastToken = Tokens.back().Tok; assert(LastToken); SmallVector SavedTokens(Tokens.size()); int Index = 0; for (const auto &Token : Tokens) { assert(Token.Tok); auto &SavedToken = SavedTokens[Index++]; SavedToken.Tok = new FormatToken; SavedToken.Tok->copyFrom(*Token.Tok); SavedToken.Children = std::move(Token.Children); } AnnotatedLine Line(ParsedLine); assert(Line.Last == LastToken); TokenAnnotator Annotator(Style, Keywords); Annotator.annotate(Line); Annotator.calculateFormattingInformation(Line); auto Length = LastToken->TotalLength; if (OpeningBrace) { assert(OpeningBrace != Tokens.front().Tok); if (auto Prev = OpeningBrace->Previous; Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) { Length -= ColumnLimit; } Length -= OpeningBrace->TokenText.size() + 1; } if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) { assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace)); Length -= FirstToken->TokenText.size() + 1; } Index = 0; for (auto &Token : Tokens) { const auto &SavedToken = SavedTokens[Index++]; Token.Tok->copyFrom(*SavedToken.Tok); Token.Children = std::move(SavedToken.Children); delete SavedToken.Tok; } // If these change PPLevel needs to be used for get correct indentation. assert(!Line.InMacroBody); assert(!Line.InPPDirective); return Line.Level * Style.IndentWidth + Length <= ColumnLimit; } FormatToken *UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels, bool MunchSemi, bool KeepBraces, IfStmtKind *IfKind, bool UnindentWhitesmithsBraces) { auto HandleVerilogBlockLabel = [this]() { // ":" name if (Style.isVerilog() && FormatTok->is(tok::colon)) { nextToken(); if (Keywords.isVerilogIdentifier(*FormatTok)) nextToken(); } }; // Whether this is a Verilog-specific block that has a special header like a // module. const bool VerilogHierarchy = Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok); assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) || (Style.isVerilog() && (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) && "'{' or macro block token expected"); FormatToken *Tok = FormatTok; const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment); auto Index = CurrentLines->size(); const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); FormatTok->setBlockKind(BK_Block); // For Whitesmiths mode, jump to the next level prior to skipping over the // braces. if (!VerilogHierarchy && AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { ++Line->Level; } size_t PPStartHash = computePPHash(); const unsigned InitialLevel = Line->Level; if (VerilogHierarchy) { AddLevels += parseVerilogHierarchyHeader(); } else { nextToken(/*LevelDifference=*/AddLevels); HandleVerilogBlockLabel(); } // Bail out if there are too many levels. Otherwise, the stack might overflow. if (Line->Level > 300) return nullptr; if (MacroBlock && FormatTok->is(tok::l_paren)) parseParens(); size_t NbPreprocessorDirectives = !parsingPPDirective() ? PreprocessorDirectives.size() : 0; addUnwrappedLine(); size_t OpeningLineIndex = CurrentLines->empty() ? (UnwrappedLine::kInvalidIndex) : (CurrentLines->size() - 1 - NbPreprocessorDirectives); // Whitesmiths is weird here. The brace needs to be indented for the namespace // block, but the block itself may not be indented depending on the style // settings. This allows the format to back up one level in those cases. if (UnindentWhitesmithsBraces) --Line->Level; ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, MustBeDeclaration); if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths) Line->Level += AddLevels; FormatToken *IfLBrace = nullptr; const bool SimpleBlock = parseLevel(Tok, IfKind, &IfLBrace); if (eof()) return IfLBrace; if (MacroBlock ? FormatTok->isNot(TT_MacroBlockEnd) : FormatTok->isNot(tok::r_brace)) { Line->Level = InitialLevel; FormatTok->setBlockKind(BK_Block); return IfLBrace; } if (FormatTok->is(tok::r_brace)) { FormatTok->setBlockKind(BK_Block); if (Tok->is(TT_NamespaceLBrace)) FormatTok->setFinalizedType(TT_NamespaceRBrace); } const bool IsFunctionRBrace = FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace); auto RemoveBraces = [=]() mutable { if (!SimpleBlock) return false; assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)); assert(FormatTok->is(tok::r_brace)); const bool WrappedOpeningBrace = !Tok->Previous; if (WrappedOpeningBrace && FollowedByComment) return false; const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional; if (KeepBraces && !HasRequiredIfBraces) return false; if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) { const FormatToken *Previous = Tokens->getPreviousToken(); assert(Previous); if (Previous->is(tok::r_brace) && !Previous->Optional) return false; } assert(!CurrentLines->empty()); auto &LastLine = CurrentLines->back(); if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine)) return false; if (Tok->is(TT_ElseLBrace)) return true; if (WrappedOpeningBrace) { assert(Index > 0); --Index; // The line above the wrapped l_brace. Tok = nullptr; } return mightFitOnOneLine((*CurrentLines)[Index], Tok); }; if (RemoveBraces()) { Tok->MatchingParen = FormatTok; FormatTok->MatchingParen = Tok; } size_t PPEndHash = computePPHash(); // Munch the closing brace. nextToken(/*LevelDifference=*/-AddLevels); // When this is a function block and there is an unnecessary semicolon // afterwards then mark it as optional (so the RemoveSemi pass can get rid of // it later). if (Style.RemoveSemicolon && IsFunctionRBrace) { while (FormatTok->is(tok::semi)) { FormatTok->Optional = true; nextToken(); } } HandleVerilogBlockLabel(); if (MacroBlock && FormatTok->is(tok::l_paren)) parseParens(); Line->Level = InitialLevel; if (FormatTok->is(tok::kw_noexcept)) { // A noexcept in a requires expression. nextToken(); } if (FormatTok->is(tok::arrow)) { // Following the } or noexcept we can find a trailing return type arrow // as part of an implicit conversion constraint. nextToken(); parseStructuralElement(); } if (MunchSemi && FormatTok->is(tok::semi)) nextToken(); if (PPStartHash == PPEndHash) { Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { // Update the opening line to add the forward reference as well (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex = CurrentLines->size() - 1; } } return IfLBrace; } static bool isGoogScope(const UnwrappedLine &Line) { // FIXME: Closure-library specific stuff should not be hard-coded but be // configurable. if (Line.Tokens.size() < 4) return false; auto I = Line.Tokens.begin(); if (I->Tok->TokenText != "goog") return false; ++I; if (I->Tok->isNot(tok::period)) return false; ++I; if (I->Tok->TokenText != "scope") return false; ++I; return I->Tok->is(tok::l_paren); } static bool isIIFE(const UnwrappedLine &Line, const AdditionalKeywords &Keywords) { // Look for the start of an immediately invoked anonymous function. // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression // This is commonly done in JavaScript to create a new, anonymous scope. // Example: (function() { ... })() if (Line.Tokens.size() < 3) return false; auto I = Line.Tokens.begin(); if (I->Tok->isNot(tok::l_paren)) return false; ++I; if (I->Tok->isNot(Keywords.kw_function)) return false; ++I; return I->Tok->is(tok::l_paren); } static bool ShouldBreakBeforeBrace(const FormatStyle &Style, const FormatToken &InitialToken) { tok::TokenKind Kind = InitialToken.Tok.getKind(); if (InitialToken.is(TT_NamespaceMacro)) Kind = tok::kw_namespace; switch (Kind) { case tok::kw_namespace: return Style.BraceWrapping.AfterNamespace; case tok::kw_class: return Style.BraceWrapping.AfterClass; case tok::kw_union: return Style.BraceWrapping.AfterUnion; case tok::kw_struct: return Style.BraceWrapping.AfterStruct; case tok::kw_enum: return Style.BraceWrapping.AfterEnum; default: return false; } } void UnwrappedLineParser::parseChildBlock() { assert(FormatTok->is(tok::l_brace)); FormatTok->setBlockKind(BK_Block); const FormatToken *OpeningBrace = FormatTok; nextToken(); { bool SkipIndent = (Style.isJavaScript() && (isGoogScope(*Line) || isIIFE(*Line, Keywords))); ScopedLineState LineState(*this); ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, /*MustBeDeclaration=*/false); Line->Level += SkipIndent ? 0 : 1; parseLevel(OpeningBrace); flushComments(isOnNewLine(*FormatTok)); Line->Level -= SkipIndent ? 0 : 1; } nextToken(); } void UnwrappedLineParser::parsePPDirective() { assert(FormatTok->is(tok::hash) && "'#' expected"); ScopedMacroState MacroState(*Line, Tokens, FormatTok); nextToken(); if (!FormatTok->Tok.getIdentifierInfo()) { parsePPUnknown(); return; } switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { case tok::pp_define: parsePPDefine(); return; case tok::pp_if: parsePPIf(/*IfDef=*/false); break; case tok::pp_ifdef: case tok::pp_ifndef: parsePPIf(/*IfDef=*/true); break; case tok::pp_else: case tok::pp_elifdef: case tok::pp_elifndef: case tok::pp_elif: parsePPElse(); break; case tok::pp_endif: parsePPEndIf(); break; case tok::pp_pragma: parsePPPragma(); break; default: parsePPUnknown(); break; } } void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { size_t Line = CurrentLines->size(); if (CurrentLines == &PreprocessorDirectives) Line += Lines.size(); if (Unreachable || (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) { PPStack.push_back({PP_Unreachable, Line}); } else { PPStack.push_back({PP_Conditional, Line}); } } void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { ++PPBranchLevel; assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { PPLevelBranchIndex.push_back(0); PPLevelBranchCount.push_back(0); } PPChainBranchIndex.push(Unreachable ? -1 : 0); bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; conditionalCompilationCondition(Unreachable || Skip); } void UnwrappedLineParser::conditionalCompilationAlternative() { if (!PPStack.empty()) PPStack.pop_back(); assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); if (!PPChainBranchIndex.empty()) ++PPChainBranchIndex.top(); conditionalCompilationCondition( PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); } void UnwrappedLineParser::conditionalCompilationEnd() { assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; } // Guard against #endif's without #if. if (PPBranchLevel > -1) --PPBranchLevel; if (!PPChainBranchIndex.empty()) PPChainBranchIndex.pop(); if (!PPStack.empty()) PPStack.pop_back(); } void UnwrappedLineParser::parsePPIf(bool IfDef) { bool IfNDef = FormatTok->is(tok::pp_ifndef); nextToken(); bool Unreachable = false; if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) Unreachable = true; if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") Unreachable = true; conditionalCompilationStart(Unreachable); FormatToken *IfCondition = FormatTok; // If there's a #ifndef on the first line, and the only lines before it are // comments, it could be an include guard. bool MaybeIncludeGuard = IfNDef; if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { for (auto &Line : Lines) { if (Line.Tokens.front().Tok->isNot(tok::comment)) { MaybeIncludeGuard = false; IncludeGuard = IG_Rejected; break; } } } --PPBranchLevel; parsePPUnknown(); ++PPBranchLevel; if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { IncludeGuard = IG_IfNdefed; IncludeGuardToken = IfCondition; } } void UnwrappedLineParser::parsePPElse() { // If a potential include guard has an #else, it's not an include guard. if (IncludeGuard == IG_Defined && PPBranchLevel == 0) IncludeGuard = IG_Rejected; // Don't crash when there is an #else without an #if. assert(PPBranchLevel >= -1); if (PPBranchLevel == -1) conditionalCompilationStart(/*Unreachable=*/true); conditionalCompilationAlternative(); --PPBranchLevel; parsePPUnknown(); ++PPBranchLevel; } void UnwrappedLineParser::parsePPEndIf() { conditionalCompilationEnd(); parsePPUnknown(); // If the #endif of a potential include guard is the last thing in the file, // then we found an include guard. if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() && Style.IndentPPDirectives != FormatStyle::PPDIS_None) { IncludeGuard = IG_Found; } } void UnwrappedLineParser::parsePPDefine() { nextToken(); if (!FormatTok->Tok.getIdentifierInfo()) { IncludeGuard = IG_Rejected; IncludeGuardToken = nullptr; parsePPUnknown(); return; } if (IncludeGuard == IG_IfNdefed && IncludeGuardToken->TokenText == FormatTok->TokenText) { IncludeGuard = IG_Defined; IncludeGuardToken = nullptr; for (auto &Line : Lines) { if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { IncludeGuard = IG_Rejected; break; } } } // In the context of a define, even keywords should be treated as normal // identifiers. Setting the kind to identifier is not enough, because we need // to treat additional keywords like __except as well, which are already // identifiers. Setting the identifier info to null interferes with include // guard processing above, and changes preprocessing nesting. FormatTok->Tok.setKind(tok::identifier); FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define); nextToken(); if (FormatTok->Tok.getKind() == tok::l_paren && !FormatTok->hasWhitespaceBefore()) { parseParens(); } if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) Line->Level += PPBranchLevel + 1; addUnwrappedLine(); ++Line->Level; Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1); assert((int)Line->PPLevel >= 0); Line->InMacroBody = true; if (Style.SkipMacroDefinitionBody) { while (!eof()) { FormatTok->Finalized = true; FormatTok = Tokens->getNextToken(); } addUnwrappedLine(); return; } // Errors during a preprocessor directive can only affect the layout of the // preprocessor directive, and thus we ignore them. An alternative approach // would be to use the same approach we use on the file level (no // re-indentation if there was a structural error) within the macro // definition. parseFile(); } void UnwrappedLineParser::parsePPPragma() { Line->InPragmaDirective = true; parsePPUnknown(); } void UnwrappedLineParser::parsePPUnknown() { do { nextToken(); } while (!eof()); if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) Line->Level += PPBranchLevel + 1; addUnwrappedLine(); } // Here we exclude certain tokens that are not usually the first token in an // unwrapped line. This is used in attempt to distinguish macro calls without // trailing semicolons from other constructs split to several lines. static bool tokenCanStartNewLine(const FormatToken &Tok) { // Semicolon can be a null-statement, l_square can be a start of a macro or // a C++11 attribute, but this doesn't seem to be common. return !Tok.isOneOf(tok::semi, tok::l_brace, // Tokens that can only be used as binary operators and a // part of overloaded operator names. tok::period, tok::periodstar, tok::arrow, tok::arrowstar, tok::less, tok::greater, tok::slash, tok::percent, tok::lessless, tok::greatergreater, tok::equal, tok::plusequal, tok::minusequal, tok::starequal, tok::slashequal, tok::percentequal, tok::ampequal, tok::pipeequal, tok::caretequal, tok::greatergreaterequal, tok::lesslessequal, // Colon is used in labels, base class lists, initializer // lists, range-based for loops, ternary operator, but // should never be the first token in an unwrapped line. tok::colon, // 'noexcept' is a trailing annotation. tok::kw_noexcept); } static bool mustBeJSIdent(const AdditionalKeywords &Keywords, const FormatToken *FormatTok) { // FIXME: This returns true for C/C++ keywords like 'struct'. return FormatTok->is(tok::identifier) && (!FormatTok->Tok.getIdentifierInfo() || !FormatTok->isOneOf( Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, Keywords.kw_let, Keywords.kw_var, tok::kw_const, Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from)); } static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, const FormatToken *FormatTok) { return FormatTok->Tok.isLiteral() || FormatTok->isOneOf(tok::kw_true, tok::kw_false) || mustBeJSIdent(Keywords, FormatTok); } // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement // when encountered after a value (see mustBeJSIdentOrValue). static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, const FormatToken *FormatTok) { return FormatTok->isOneOf( tok::kw_return, Keywords.kw_yield, // conditionals tok::kw_if, tok::kw_else, // loops tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, // switch/case tok::kw_switch, tok::kw_case, // exceptions tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, // declaration tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, Keywords.kw_async, Keywords.kw_function, // import/export Keywords.kw_import, tok::kw_export); } // Checks whether a token is a type in K&R C (aka C78). static bool isC78Type(const FormatToken &Tok) { return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long, tok::kw_unsigned, tok::kw_float, tok::kw_double, tok::identifier); } // This function checks whether a token starts the first parameter declaration // in a K&R C (aka C78) function definition, e.g.: // int f(a, b) // short a, b; // { // return a + b; // } static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next, const FormatToken *FuncName) { assert(Tok); assert(Next); assert(FuncName); if (FuncName->isNot(tok::identifier)) return false; const FormatToken *Prev = FuncName->Previous; if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev))) return false; if (!isC78Type(*Tok) && !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) { return false; } if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo()) return false; Tok = Tok->Previous; if (!Tok || Tok->isNot(tok::r_paren)) return false; Tok = Tok->Previous; if (!Tok || Tok->isNot(tok::identifier)) return false; return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma); } bool UnwrappedLineParser::parseModuleImport() { assert(FormatTok->is(Keywords.kw_import) && "'import' expected"); if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true); !Token->Tok.getIdentifierInfo() && !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) { return false; } nextToken(); while (!eof()) { if (FormatTok->is(tok::colon)) { FormatTok->setFinalizedType(TT_ModulePartitionColon); } // Handle import as we would an include statement. else if (FormatTok->is(tok::less)) { nextToken(); while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) { // Mark tokens up to the trailing line comments as implicit string // literals. if (FormatTok->isNot(tok::comment) && !FormatTok->TokenText.starts_with("//")) { FormatTok->setFinalizedType(TT_ImplicitStringLiteral); } nextToken(); } } if (FormatTok->is(tok::semi)) { nextToken(); break; } nextToken(); } addUnwrappedLine(); return true; } // readTokenWithJavaScriptASI reads the next token and terminates the current // line if JavaScript Automatic Semicolon Insertion must // happen between the current token and the next token. // // This method is conservative - it cannot cover all edge cases of JavaScript, // but only aims to correctly handle certain well known cases. It *must not* // return true in speculative cases. void UnwrappedLineParser::readTokenWithJavaScriptASI() { FormatToken *Previous = FormatTok; readToken(); FormatToken *Next = FormatTok; bool IsOnSameLine = CommentsBeforeNextToken.empty() ? Next->NewlinesBefore == 0 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; if (IsOnSameLine) return; bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); bool PreviousStartsTemplateExpr = Previous->is(TT_TemplateString) && Previous->TokenText.ends_with("${"); if (PreviousMustBeValue || Previous->is(tok::r_paren)) { // If the line contains an '@' sign, the previous token might be an // annotation, which can precede another identifier/value. bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) { return LineNode.Tok->is(tok::at); }); if (HasAt) return; } if (Next->is(tok::exclaim) && PreviousMustBeValue) return addUnwrappedLine(); bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); bool NextEndsTemplateExpr = Next->is(TT_TemplateString) && Next->TokenText.starts_with("}"); if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && (PreviousMustBeValue || Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, tok::minusminus))) { return addUnwrappedLine(); } if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && isJSDeclOrStmt(Keywords, Next)) { return addUnwrappedLine(); } } void UnwrappedLineParser::parseStructuralElement( const FormatToken *OpeningBrace, IfStmtKind *IfKind, FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) { if (Style.Language == FormatStyle::LK_TableGen && FormatTok->is(tok::pp_include)) { nextToken(); if (FormatTok->is(tok::string_literal)) nextToken(); addUnwrappedLine(); return; } if (IsCpp) { while (FormatTok->is(tok::l_square) && handleCppAttributes()) { } } else if (Style.isVerilog()) { if (Keywords.isVerilogStructuredProcedure(*FormatTok)) { parseForOrWhileLoop(/*HasParens=*/false); return; } if (FormatTok->isOneOf(Keywords.kw_foreach, Keywords.kw_repeat)) { parseForOrWhileLoop(); return; } if (FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert, Keywords.kw_assume, Keywords.kw_cover)) { parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true); return; } // Skip things that can exist before keywords like 'if' and 'case'. while (true) { if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique, Keywords.kw_unique0)) { nextToken(); } else if (FormatTok->is(tok::l_paren) && Tokens->peekNextToken()->is(tok::star)) { parseParens(); } else { break; } } } // Tokens that only make sense at the beginning of a line. if (FormatTok->isAccessSpecifierKeyword()) { if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || Style.isCSharp()) { nextToken(); } else { parseAccessSpecifier(); } return; } switch (FormatTok->Tok.getKind()) { case tok::kw_asm: nextToken(); if (FormatTok->is(tok::l_brace)) { FormatTok->setFinalizedType(TT_InlineASMBrace); nextToken(); while (FormatTok && !eof()) { if (FormatTok->is(tok::r_brace)) { FormatTok->setFinalizedType(TT_InlineASMBrace); nextToken(); addUnwrappedLine(); break; } FormatTok->Finalized = true; nextToken(); } } break; case tok::kw_namespace: parseNamespace(); return; case tok::kw_if: { if (Style.isJavaScript() && Line->MustBeDeclaration) { // field/method declaration. break; } FormatToken *Tok = parseIfThenElse(IfKind); if (IfLeftBrace) *IfLeftBrace = Tok; return; } case tok::kw_for: case tok::kw_while: if (Style.isJavaScript() && Line->MustBeDeclaration) { // field/method declaration. break; } parseForOrWhileLoop(); return; case tok::kw_do: if (Style.isJavaScript() && Line->MustBeDeclaration) { // field/method declaration. break; } parseDoWhile(); if (HasDoWhile) *HasDoWhile = true; return; case tok::kw_switch: if (Style.isJavaScript() && Line->MustBeDeclaration) { // 'switch: string' field declaration. break; } parseSwitch(/*IsExpr=*/false); return; case tok::kw_default: { // In Verilog default along with other labels are handled in the next loop. if (Style.isVerilog()) break; if (Style.isJavaScript() && Line->MustBeDeclaration) { // 'default: string' field declaration. break; } auto *Default = FormatTok; nextToken(); if (FormatTok->is(tok::colon)) { FormatTok->setFinalizedType(TT_CaseLabelColon); parseLabel(); return; } if (FormatTok->is(tok::arrow)) { FormatTok->setFinalizedType(TT_CaseLabelArrow); Default->setFinalizedType(TT_SwitchExpressionLabel); parseLabel(); return; } // e.g. "default void f() {}" in a Java interface. break; } case tok::kw_case: // Proto: there are no switch/case statements. if (Style.Language == FormatStyle::LK_Proto) { nextToken(); return; } if (Style.isVerilog()) { parseBlock(); addUnwrappedLine(); return; } if (Style.isJavaScript() && Line->MustBeDeclaration) { // 'case: string' field declaration. nextToken(); break; } parseCaseLabel(); return; case tok::kw_try: case tok::kw___try: if (Style.isJavaScript() && Line->MustBeDeclaration) { // field/method declaration. break; } parseTryCatch(); return; case tok::kw_extern: nextToken(); if (Style.isVerilog()) { // In Verilog and extern module declaration looks like a start of module. // But there is no body and endmodule. So we handle it separately. if (Keywords.isVerilogHierarchy(*FormatTok)) { parseVerilogHierarchyHeader(); return; } } else if (FormatTok->is(tok::string_literal)) { nextToken(); if (FormatTok->is(tok::l_brace)) { if (Style.BraceWrapping.AfterExternBlock) addUnwrappedLine(); // Either we indent or for backwards compatibility we follow the // AfterExternBlock style. unsigned AddLevels = (Style.IndentExternBlock == FormatStyle::IEBS_Indent) || (Style.BraceWrapping.AfterExternBlock && Style.IndentExternBlock == FormatStyle::IEBS_AfterExternBlock) ? 1u : 0u; parseBlock(/*MustBeDeclaration=*/true, AddLevels); addUnwrappedLine(); return; } } break; case tok::kw_export: if (Style.isJavaScript()) { parseJavaScriptEs6ImportExport(); return; } if (IsCpp) { nextToken(); if (FormatTok->is(tok::kw_namespace)) { parseNamespace(); return; } if (FormatTok->is(Keywords.kw_import) && parseModuleImport()) return; } break; case tok::kw_inline: nextToken(); if (FormatTok->is(tok::kw_namespace)) { parseNamespace(); return; } break; case tok::identifier: if (FormatTok->is(TT_ForEachMacro)) { parseForOrWhileLoop(); return; } if (FormatTok->is(TT_MacroBlockBegin)) { parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, /*MunchSemi=*/false); return; } if (FormatTok->is(Keywords.kw_import)) { if (Style.isJavaScript()) { parseJavaScriptEs6ImportExport(); return; } if (Style.Language == FormatStyle::LK_Proto) { nextToken(); if (FormatTok->is(tok::kw_public)) nextToken(); if (FormatTok->isNot(tok::string_literal)) return; nextToken(); if (FormatTok->is(tok::semi)) nextToken(); addUnwrappedLine(); return; } if (IsCpp && parseModuleImport()) return; } if (IsCpp && FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, Keywords.kw_slots, Keywords.kw_qslots)) { nextToken(); if (FormatTok->is(tok::colon)) { nextToken(); addUnwrappedLine(); return; } } if (IsCpp && FormatTok->is(TT_StatementMacro)) { parseStatementMacro(); return; } if (IsCpp && FormatTok->is(TT_NamespaceMacro)) { parseNamespace(); return; } // In Verilog labels can be any expression, so we don't do them here. // JS doesn't have macros, and within classes colons indicate fields, not // labels. // TableGen doesn't have labels. if (!Style.isJavaScript() && !Style.isVerilog() && !Style.isTableGen() && Tokens->peekNextToken()->is(tok::colon) && !Line->MustBeDeclaration) { nextToken(); if (!Line->InMacroBody || CurrentLines->size() > 1) Line->Tokens.begin()->Tok->MustBreakBefore = true; FormatTok->setFinalizedType(TT_GotoLabelColon); parseLabel(!Style.IndentGotoLabels); if (HasLabel) *HasLabel = true; return; } // In all other cases, parse the declaration. break; default: break; } for (const bool InRequiresExpression = OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace); !eof();) { if (IsCpp && FormatTok->isCppAlternativeOperatorKeyword()) { if (auto *Next = Tokens->peekNextToken(/*SkipComment=*/true); Next && Next->isBinaryOperator()) { FormatTok->Tok.setKind(tok::identifier); } } const FormatToken *Previous = FormatTok->Previous; switch (FormatTok->Tok.getKind()) { case tok::at: nextToken(); if (FormatTok->is(tok::l_brace)) { nextToken(); parseBracedList(); break; } else if (Style.Language == FormatStyle::LK_Java && FormatTok->is(Keywords.kw_interface)) { nextToken(); break; } switch (FormatTok->Tok.getObjCKeywordID()) { case tok::objc_public: case tok::objc_protected: case tok::objc_package: case tok::objc_private: return parseAccessSpecifier(); case tok::objc_interface: case tok::objc_implementation: return parseObjCInterfaceOrImplementation(); case tok::objc_protocol: if (parseObjCProtocol()) return; break; case tok::objc_end: return; // Handled by the caller. case tok::objc_optional: case tok::objc_required: nextToken(); addUnwrappedLine(); return; case tok::objc_autoreleasepool: nextToken(); if (FormatTok->is(tok::l_brace)) { if (Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Always) { addUnwrappedLine(); } parseBlock(); } addUnwrappedLine(); return; case tok::objc_synchronized: nextToken(); if (FormatTok->is(tok::l_paren)) { // Skip synchronization object parseParens(); } if (FormatTok->is(tok::l_brace)) { if (Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Always) { addUnwrappedLine(); } parseBlock(); } addUnwrappedLine(); return; case tok::objc_try: // This branch isn't strictly necessary (the kw_try case below would // do this too after the tok::at is parsed above). But be explicit. parseTryCatch(); return; default: break; } break; case tok::kw_requires: { if (IsCpp) { bool ParsedClause = parseRequires(); if (ParsedClause) return; } else { nextToken(); } break; } case tok::kw_enum: // Ignore if this is part of "template enum" or // "template <..., enum ...>". if (Previous && Previous->isOneOf(tok::less, tok::arrow, tok::comma)) { nextToken(); break; } // parseEnum falls through and does not yet add an unwrapped line as an // enum definition can start a structural element. if (!parseEnum()) break; // This only applies to C++ and Verilog. if (!IsCpp && !Style.isVerilog()) { addUnwrappedLine(); return; } break; case tok::kw_typedef: nextToken(); if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS, Keywords.kw_CF_CLOSED_ENUM, Keywords.kw_NS_CLOSED_ENUM)) { parseEnum(); } break; case tok::kw_class: if (Style.isVerilog()) { parseBlock(); addUnwrappedLine(); return; } if (Style.isTableGen()) { // Do nothing special. In this case the l_brace becomes FunctionLBrace. // This is same as def and so on. nextToken(); break; } [[fallthrough]]; case tok::kw_struct: case tok::kw_union: if (parseStructLike()) return; break; case tok::kw_decltype: nextToken(); if (FormatTok->is(tok::l_paren)) { parseParens(); assert(FormatTok->Previous); if (FormatTok->Previous->endsSequence(tok::r_paren, tok::kw_auto, tok::l_paren)) { Line->SeenDecltypeAuto = true; } } break; case tok::period: nextToken(); // In Java, classes have an implicit static member "class". if (Style.Language == FormatStyle::LK_Java && FormatTok && FormatTok->is(tok::kw_class)) { nextToken(); } if (Style.isJavaScript() && FormatTok && FormatTok->Tok.getIdentifierInfo()) { // JavaScript only has pseudo keywords, all keywords are allowed to // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 nextToken(); } break; case tok::semi: nextToken(); addUnwrappedLine(); return; case tok::r_brace: addUnwrappedLine(); return; case tok::l_paren: { parseParens(); // Break the unwrapped line if a K&R C function definition has a parameter // declaration. if (OpeningBrace || !IsCpp || !Previous || eof()) break; if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(/*SkipComment=*/true), Previous)) { addUnwrappedLine(); return; } break; } case tok::kw_operator: nextToken(); if (FormatTok->isBinaryOperator()) nextToken(); break; case tok::caret: nextToken(); // Block return type. if (FormatTok->Tok.isAnyIdentifier() || FormatTok->isTypeName(LangOpts)) { nextToken(); // Return types: pointers are ok too. while (FormatTok->is(tok::star)) nextToken(); } // Block argument list. if (FormatTok->is(tok::l_paren)) parseParens(); // Block body. if (FormatTok->is(tok::l_brace)) parseChildBlock(); break; case tok::l_brace: if (InRequiresExpression) FormatTok->setFinalizedType(TT_BracedListLBrace); if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) { IsDecltypeAutoFunction = Line->SeenDecltypeAuto; // A block outside of parentheses must be the last part of a // structural element. // FIXME: Figure out cases where this is not true, and add projections // for them (the one we know is missing are lambdas). if (Style.Language == FormatStyle::LK_Java && Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) { // If necessary, we could set the type to something different than // TT_FunctionLBrace. if (Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Always) { addUnwrappedLine(); } } else if (Style.BraceWrapping.AfterFunction) { addUnwrappedLine(); } if (!Previous || Previous->isNot(TT_TypeDeclarationParen)) FormatTok->setFinalizedType(TT_FunctionLBrace); parseBlock(); IsDecltypeAutoFunction = false; addUnwrappedLine(); return; } // Otherwise this was a braced init list, and the structural // element continues. break; case tok::kw_try: if (Style.isJavaScript() && Line->MustBeDeclaration) { // field/method declaration. nextToken(); break; } // We arrive here when parsing function-try blocks. if (Style.BraceWrapping.AfterFunction) addUnwrappedLine(); parseTryCatch(); return; case tok::identifier: { if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) && Line->MustBeDeclaration) { addUnwrappedLine(); parseCSharpGenericTypeConstraint(); break; } if (FormatTok->is(TT_MacroBlockEnd)) { addUnwrappedLine(); return; } // Function declarations (as opposed to function expressions) are parsed // on their own unwrapped line by continuing this loop. Function // expressions (functions that are not on their own line) must not create // a new unwrapped line, so they are special cased below. size_t TokenCount = Line->Tokens.size(); if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) && (TokenCount > 1 || (TokenCount == 1 && Line->Tokens.front().Tok->isNot(Keywords.kw_async)))) { tryToParseJSFunction(); break; } if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) && FormatTok->is(Keywords.kw_interface)) { if (Style.isJavaScript()) { // In JavaScript/TypeScript, "interface" can be used as a standalone // identifier, e.g. in `var interface = 1;`. If "interface" is // followed by another identifier, it is very like to be an actual // interface declaration. unsigned StoredPosition = Tokens->getPosition(); FormatToken *Next = Tokens->getNextToken(); FormatTok = Tokens->setPosition(StoredPosition); if (!mustBeJSIdent(Keywords, Next)) { nextToken(); break; } } parseRecord(); addUnwrappedLine(); return; } if (Style.isVerilog()) { if (FormatTok->is(Keywords.kw_table)) { parseVerilogTable(); return; } if (Keywords.isVerilogBegin(*FormatTok) || Keywords.isVerilogHierarchy(*FormatTok)) { parseBlock(); addUnwrappedLine(); return; } } if (!IsCpp && FormatTok->is(Keywords.kw_interface)) { if (parseStructLike()) return; break; } if (IsCpp && FormatTok->is(TT_StatementMacro)) { parseStatementMacro(); return; } // See if the following token should start a new unwrapped line. StringRef Text = FormatTok->TokenText; FormatToken *PreviousToken = FormatTok; nextToken(); // JS doesn't have macros, and within classes colons indicate fields, not // labels. if (Style.isJavaScript()) break; auto OneTokenSoFar = [&]() { auto I = Line->Tokens.begin(), E = Line->Tokens.end(); while (I != E && I->Tok->is(tok::comment)) ++I; if (Style.isVerilog()) while (I != E && I->Tok->is(tok::hash)) ++I; return I != E && (++I == E); }; if (OneTokenSoFar()) { // Recognize function-like macro usages without trailing semicolon as // well as free-standing macros like Q_OBJECT. bool FunctionLike = FormatTok->is(tok::l_paren); if (FunctionLike) parseParens(); bool FollowedByNewline = CommentsBeforeNextToken.empty() ? FormatTok->NewlinesBefore > 0 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) { if (PreviousToken->isNot(TT_UntouchableMacroFunc)) PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro); addUnwrappedLine(); return; } } break; } case tok::equal: if ((Style.isJavaScript() || Style.isCSharp()) && FormatTok->is(TT_FatArrow)) { tryToParseChildBlock(); break; } nextToken(); if (FormatTok->is(tok::l_brace)) { // Block kind should probably be set to BK_BracedInit for any language. // C# needs this change to ensure that array initialisers and object // initialisers are indented the same way. if (Style.isCSharp()) FormatTok->setBlockKind(BK_BracedInit); // TableGen's defset statement has syntax of the form, // `defset = { ... }` if (Style.isTableGen() && Line->Tokens.begin()->Tok->is(Keywords.kw_defset)) { FormatTok->setFinalizedType(TT_FunctionLBrace); parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, /*MunchSemi=*/false); addUnwrappedLine(); break; } nextToken(); parseBracedList(); } else if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::less)) { nextToken(); parseBracedList(/*IsAngleBracket=*/true); } break; case tok::l_square: parseSquare(); break; case tok::kw_new: parseNew(); break; case tok::kw_switch: if (Style.Language == FormatStyle::LK_Java) parseSwitch(/*IsExpr=*/true); - nextToken(); + else + nextToken(); break; case tok::kw_case: // Proto: there are no switch/case statements. if (Style.Language == FormatStyle::LK_Proto) { nextToken(); return; } // In Verilog switch is called case. if (Style.isVerilog()) { parseBlock(); addUnwrappedLine(); return; } if (Style.isJavaScript() && Line->MustBeDeclaration) { // 'case: string' field declaration. nextToken(); break; } parseCaseLabel(); break; case tok::kw_default: nextToken(); if (Style.isVerilog()) { if (FormatTok->is(tok::colon)) { // The label will be handled in the next iteration. break; } if (FormatTok->is(Keywords.kw_clocking)) { // A default clocking block. parseBlock(); addUnwrappedLine(); return; } parseVerilogCaseLabel(); return; } break; case tok::colon: nextToken(); if (Style.isVerilog()) { parseVerilogCaseLabel(); return; } break; case tok::greater: nextToken(); if (FormatTok->is(tok::l_brace)) FormatTok->Previous->setFinalizedType(TT_TemplateCloser); break; default: nextToken(); break; } } } bool UnwrappedLineParser::tryToParsePropertyAccessor() { assert(FormatTok->is(tok::l_brace)); if (!Style.isCSharp()) return false; // See if it's a property accessor. if (FormatTok->Previous->isNot(tok::identifier)) return false; // See if we are inside a property accessor. // // Record the current tokenPosition so that we can advance and // reset the current token. `Next` is not set yet so we need // another way to advance along the token stream. unsigned int StoredPosition = Tokens->getPosition(); FormatToken *Tok = Tokens->getNextToken(); // A trivial property accessor is of the form: // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] } // Track these as they do not require line breaks to be introduced. bool HasSpecialAccessor = false; bool IsTrivialPropertyAccessor = true; while (!eof()) { if (Tok->isAccessSpecifierKeyword() || Tok->isOneOf(tok::semi, Keywords.kw_internal, Keywords.kw_get, Keywords.kw_init, Keywords.kw_set)) { if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set)) HasSpecialAccessor = true; Tok = Tokens->getNextToken(); continue; } if (Tok->isNot(tok::r_brace)) IsTrivialPropertyAccessor = false; break; } if (!HasSpecialAccessor) { Tokens->setPosition(StoredPosition); return false; } // Try to parse the property accessor: // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties Tokens->setPosition(StoredPosition); if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction) addUnwrappedLine(); nextToken(); do { switch (FormatTok->Tok.getKind()) { case tok::r_brace: nextToken(); if (FormatTok->is(tok::equal)) { while (!eof() && FormatTok->isNot(tok::semi)) nextToken(); nextToken(); } addUnwrappedLine(); return true; case tok::l_brace: ++Line->Level; parseBlock(/*MustBeDeclaration=*/true); addUnwrappedLine(); --Line->Level; break; case tok::equal: if (FormatTok->is(TT_FatArrow)) { ++Line->Level; do { nextToken(); } while (!eof() && FormatTok->isNot(tok::semi)); nextToken(); addUnwrappedLine(); --Line->Level; break; } nextToken(); break; default: if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set) && !IsTrivialPropertyAccessor) { // Non-trivial get/set needs to be on its own line. addUnwrappedLine(); } nextToken(); } } while (!eof()); // Unreachable for well-formed code (paired '{' and '}'). return true; } bool UnwrappedLineParser::tryToParseLambda() { assert(FormatTok->is(tok::l_square)); if (!IsCpp) { nextToken(); return false; } FormatToken &LSquare = *FormatTok; if (!tryToParseLambdaIntroducer()) return false; bool SeenArrow = false; bool InTemplateParameterList = false; while (FormatTok->isNot(tok::l_brace)) { if (FormatTok->isTypeName(LangOpts) || FormatTok->isAttribute()) { nextToken(); continue; } switch (FormatTok->Tok.getKind()) { case tok::l_brace: break; case tok::l_paren: parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference); break; case tok::l_square: parseSquare(); break; case tok::less: assert(FormatTok->Previous); if (FormatTok->Previous->is(tok::r_square)) InTemplateParameterList = true; nextToken(); break; case tok::kw_auto: case tok::kw_class: case tok::kw_struct: case tok::kw_union: case tok::kw_template: case tok::kw_typename: case tok::amp: case tok::star: case tok::kw_const: case tok::kw_constexpr: case tok::kw_consteval: case tok::comma: case tok::greater: case tok::identifier: case tok::numeric_constant: case tok::coloncolon: case tok::kw_mutable: case tok::kw_noexcept: case tok::kw_static: nextToken(); break; // Specialization of a template with an integer parameter can contain // arithmetic, logical, comparison and ternary operators. // // FIXME: This also accepts sequences of operators that are not in the scope // of a template argument list. // // In a C++ lambda a template type can only occur after an arrow. We use // this as an heuristic to distinguish between Objective-C expressions // followed by an `a->b` expression, such as: // ([obj func:arg] + a->b) // Otherwise the code below would parse as a lambda. case tok::plus: case tok::minus: case tok::exclaim: case tok::tilde: case tok::slash: case tok::percent: case tok::lessless: case tok::pipe: case tok::pipepipe: case tok::ampamp: case tok::caret: case tok::equalequal: case tok::exclaimequal: case tok::greaterequal: case tok::lessequal: case tok::question: case tok::colon: case tok::ellipsis: case tok::kw_true: case tok::kw_false: if (SeenArrow || InTemplateParameterList) { nextToken(); break; } return true; case tok::arrow: // This might or might not actually be a lambda arrow (this could be an // ObjC method invocation followed by a dereferencing arrow). We might // reset this back to TT_Unknown in TokenAnnotator. FormatTok->setFinalizedType(TT_LambdaArrow); SeenArrow = true; nextToken(); break; case tok::kw_requires: { auto *RequiresToken = FormatTok; nextToken(); parseRequiresClause(RequiresToken); break; } case tok::equal: if (!InTemplateParameterList) return true; nextToken(); break; default: return true; } } FormatTok->setFinalizedType(TT_LambdaLBrace); LSquare.setFinalizedType(TT_LambdaLSquare); NestedLambdas.push_back(Line->SeenDecltypeAuto); parseChildBlock(); assert(!NestedLambdas.empty()); NestedLambdas.pop_back(); return true; } bool UnwrappedLineParser::tryToParseLambdaIntroducer() { const FormatToken *Previous = FormatTok->Previous; const FormatToken *LeftSquare = FormatTok; nextToken(); if ((Previous && ((Previous->Tok.getIdentifierInfo() && !Previous->isOneOf(tok::kw_return, tok::kw_co_await, tok::kw_co_yield, tok::kw_co_return)) || Previous->closesScope())) || LeftSquare->isCppStructuredBinding(IsCpp)) { return false; } if (FormatTok->is(tok::l_square) || tok::isLiteral(FormatTok->Tok.getKind())) return false; if (FormatTok->is(tok::r_square)) { const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true); if (Next->is(tok::greater)) return false; } parseSquare(/*LambdaIntroducer=*/true); return true; } void UnwrappedLineParser::tryToParseJSFunction() { assert(FormatTok->is(Keywords.kw_function)); if (FormatTok->is(Keywords.kw_async)) nextToken(); // Consume "function". nextToken(); // Consume * (generator function). Treat it like C++'s overloaded operators. if (FormatTok->is(tok::star)) { FormatTok->setFinalizedType(TT_OverloadedOperator); nextToken(); } // Consume function name. if (FormatTok->is(tok::identifier)) nextToken(); if (FormatTok->isNot(tok::l_paren)) return; // Parse formal parameter list. parseParens(); if (FormatTok->is(tok::colon)) { // Parse a type definition. nextToken(); // Eat the type declaration. For braced inline object types, balance braces, // otherwise just parse until finding an l_brace for the function body. if (FormatTok->is(tok::l_brace)) tryToParseBracedList(); else while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) nextToken(); } if (FormatTok->is(tok::semi)) return; parseChildBlock(); } bool UnwrappedLineParser::tryToParseBracedList() { if (FormatTok->is(BK_Unknown)) calculateBraceTypes(); assert(FormatTok->isNot(BK_Unknown)); if (FormatTok->is(BK_Block)) return false; nextToken(); parseBracedList(); return true; } bool UnwrappedLineParser::tryToParseChildBlock() { assert(Style.isJavaScript() || Style.isCSharp()); assert(FormatTok->is(TT_FatArrow)); // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow. // They always start an expression or a child block if followed by a curly // brace. nextToken(); if (FormatTok->isNot(tok::l_brace)) return false; parseChildBlock(); return true; } bool UnwrappedLineParser::parseBracedList(bool IsAngleBracket, bool IsEnum) { assert(!IsAngleBracket || !IsEnum); bool HasError = false; // FIXME: Once we have an expression parser in the UnwrappedLineParser, // replace this by using parseAssignmentExpression() inside. do { if (Style.isCSharp() && FormatTok->is(TT_FatArrow) && tryToParseChildBlock()) { continue; } if (Style.isJavaScript()) { if (FormatTok->is(Keywords.kw_function)) { tryToParseJSFunction(); continue; } if (FormatTok->is(tok::l_brace)) { // Could be a method inside of a braced list `{a() { return 1; }}`. if (tryToParseBracedList()) continue; parseChildBlock(); } } if (FormatTok->is(IsAngleBracket ? tok::greater : tok::r_brace)) { if (IsEnum) { FormatTok->setBlockKind(BK_Block); if (!Style.AllowShortEnumsOnASingleLine) addUnwrappedLine(); } nextToken(); return !HasError; } switch (FormatTok->Tok.getKind()) { case tok::l_square: if (Style.isCSharp()) parseSquare(); else tryToParseLambda(); break; case tok::l_paren: parseParens(); // JavaScript can just have free standing methods and getters/setters in // object literals. Detect them by a "{" following ")". if (Style.isJavaScript()) { if (FormatTok->is(tok::l_brace)) parseChildBlock(); break; } break; case tok::l_brace: // Assume there are no blocks inside a braced init list apart // from the ones we explicitly parse out (like lambdas). FormatTok->setBlockKind(BK_BracedInit); if (!IsAngleBracket) { auto *Prev = FormatTok->Previous; if (Prev && Prev->is(tok::greater)) Prev->setFinalizedType(TT_TemplateCloser); } nextToken(); parseBracedList(); break; case tok::less: nextToken(); if (IsAngleBracket) parseBracedList(/*IsAngleBracket=*/true); break; case tok::semi: // JavaScript (or more precisely TypeScript) can have semicolons in braced // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be // used for error recovery if we have otherwise determined that this is // a braced list. if (Style.isJavaScript()) { nextToken(); break; } HasError = true; if (!IsEnum) return false; nextToken(); break; case tok::comma: nextToken(); if (IsEnum && !Style.AllowShortEnumsOnASingleLine) addUnwrappedLine(); break; default: nextToken(); break; } } while (!eof()); return false; } /// \brief Parses a pair of parentheses (and everything between them). /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all /// double ampersands. This applies for all nested scopes as well. /// /// Returns whether there is a `=` token between the parentheses. bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) { assert(FormatTok->is(tok::l_paren) && "'(' expected."); auto *LeftParen = FormatTok; bool SeenEqual = false; bool MightBeFoldExpr = false; const bool MightBeStmtExpr = Tokens->peekNextToken()->is(tok::l_brace); nextToken(); do { switch (FormatTok->Tok.getKind()) { case tok::l_paren: if (parseParens(AmpAmpTokenType)) SeenEqual = true; if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) parseChildBlock(); break; case tok::r_paren: { auto *Prev = LeftParen->Previous; if (!MightBeStmtExpr && !MightBeFoldExpr && !Line->InMacroBody && Style.RemoveParentheses > FormatStyle::RPS_Leave) { const auto *Next = Tokens->peekNextToken(); const bool DoubleParens = Prev && Prev->is(tok::l_paren) && Next && Next->is(tok::r_paren); const auto *PrevPrev = Prev ? Prev->getPreviousNonComment() : nullptr; const bool Blacklisted = PrevPrev && (PrevPrev->isOneOf(tok::kw___attribute, tok::kw_decltype) || (SeenEqual && (PrevPrev->isOneOf(tok::kw_if, tok::kw_while) || PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if)))); const bool ReturnParens = Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement && ((NestedLambdas.empty() && !IsDecltypeAutoFunction) || (!NestedLambdas.empty() && !NestedLambdas.back())) && Prev && Prev->isOneOf(tok::kw_return, tok::kw_co_return) && Next && Next->is(tok::semi); if ((DoubleParens && !Blacklisted) || ReturnParens) { LeftParen->Optional = true; FormatTok->Optional = true; } } if (Prev) { if (Prev->is(TT_TypenameMacro)) { LeftParen->setFinalizedType(TT_TypeDeclarationParen); FormatTok->setFinalizedType(TT_TypeDeclarationParen); } else if (Prev->is(tok::greater) && FormatTok->Previous == LeftParen) { Prev->setFinalizedType(TT_TemplateCloser); } } nextToken(); return SeenEqual; } case tok::r_brace: // A "}" inside parenthesis is an error if there wasn't a matching "{". return SeenEqual; case tok::l_square: tryToParseLambda(); break; case tok::l_brace: if (!tryToParseBracedList()) parseChildBlock(); break; case tok::at: nextToken(); if (FormatTok->is(tok::l_brace)) { nextToken(); parseBracedList(); } break; case tok::ellipsis: MightBeFoldExpr = true; nextToken(); break; case tok::equal: SeenEqual = true; if (Style.isCSharp() && FormatTok->is(TT_FatArrow)) tryToParseChildBlock(); else nextToken(); break; case tok::kw_class: if (Style.isJavaScript()) parseRecord(/*ParseAsExpr=*/true); else nextToken(); break; case tok::identifier: if (Style.isJavaScript() && (FormatTok->is(Keywords.kw_function))) tryToParseJSFunction(); else nextToken(); break; case tok::kw_switch: - parseSwitch(/*IsExpr=*/true); + if (Style.Language == FormatStyle::LK_Java) + parseSwitch(/*IsExpr=*/true); + else + nextToken(); break; case tok::kw_requires: { auto RequiresToken = FormatTok; nextToken(); parseRequiresExpression(RequiresToken); break; } case tok::ampamp: if (AmpAmpTokenType != TT_Unknown) FormatTok->setFinalizedType(AmpAmpTokenType); [[fallthrough]]; default: nextToken(); break; } } while (!eof()); return SeenEqual; } void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { if (!LambdaIntroducer) { assert(FormatTok->is(tok::l_square) && "'[' expected."); if (tryToParseLambda()) return; } do { switch (FormatTok->Tok.getKind()) { case tok::l_paren: parseParens(); break; case tok::r_square: nextToken(); return; case tok::r_brace: // A "}" inside parenthesis is an error if there wasn't a matching "{". return; case tok::l_square: parseSquare(); break; case tok::l_brace: { if (!tryToParseBracedList()) parseChildBlock(); break; } case tok::at: case tok::colon: nextToken(); if (FormatTok->is(tok::l_brace)) { nextToken(); parseBracedList(); } break; default: nextToken(); break; } } while (!eof()); } void UnwrappedLineParser::keepAncestorBraces() { if (!Style.RemoveBracesLLVM) return; const int MaxNestingLevels = 2; const int Size = NestedTooDeep.size(); if (Size >= MaxNestingLevels) NestedTooDeep[Size - MaxNestingLevels] = true; NestedTooDeep.push_back(false); } static FormatToken *getLastNonComment(const UnwrappedLine &Line) { for (const auto &Token : llvm::reverse(Line.Tokens)) if (Token.Tok->isNot(tok::comment)) return Token.Tok; return nullptr; } void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) { FormatToken *Tok = nullptr; if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() && PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) { Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never ? getLastNonComment(*Line) : Line->Tokens.back().Tok; assert(Tok); if (Tok->BraceCount < 0) { assert(Tok->BraceCount == -1); Tok = nullptr; } else { Tok->BraceCount = -1; } } addUnwrappedLine(); ++Line->Level; ++Line->UnbracedBodyLevel; parseStructuralElement(); --Line->UnbracedBodyLevel; if (Tok) { assert(!Line->InPPDirective); Tok = nullptr; for (const auto &L : llvm::reverse(*CurrentLines)) { if (!L.InPPDirective && getLastNonComment(L)) { Tok = L.Tokens.back().Tok; break; } } assert(Tok); ++Tok->BraceCount; } if (CheckEOF && eof()) addUnwrappedLine(); --Line->Level; } static void markOptionalBraces(FormatToken *LeftBrace) { if (!LeftBrace) return; assert(LeftBrace->is(tok::l_brace)); FormatToken *RightBrace = LeftBrace->MatchingParen; if (!RightBrace) { assert(!LeftBrace->Optional); return; } assert(RightBrace->is(tok::r_brace)); assert(RightBrace->MatchingParen == LeftBrace); assert(LeftBrace->Optional == RightBrace->Optional); LeftBrace->Optional = true; RightBrace->Optional = true; } void UnwrappedLineParser::handleAttributes() { // Handle AttributeMacro, e.g. `if (x) UNLIKELY`. if (FormatTok->isAttribute()) nextToken(); else if (FormatTok->is(tok::l_square)) handleCppAttributes(); } bool UnwrappedLineParser::handleCppAttributes() { // Handle [[likely]] / [[unlikely]] attributes. assert(FormatTok->is(tok::l_square)); if (!tryToParseSimpleAttribute()) return false; parseSquare(); return true; } /// Returns whether \c Tok begins a block. bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const { // FIXME: rename the function or make // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work. return Style.isVerilog() ? Keywords.isVerilogBegin(Tok) : Tok.is(tok::l_brace); } FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind, bool KeepBraces, bool IsVerilogAssert) { assert((FormatTok->is(tok::kw_if) || (Style.isVerilog() && FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert, Keywords.kw_assume, Keywords.kw_cover))) && "'if' expected"); nextToken(); if (IsVerilogAssert) { // Handle `assert #0` and `assert final`. if (FormatTok->is(Keywords.kw_verilogHash)) { nextToken(); if (FormatTok->is(tok::numeric_constant)) nextToken(); } else if (FormatTok->isOneOf(Keywords.kw_final, Keywords.kw_property, Keywords.kw_sequence)) { nextToken(); } } // TableGen's if statement has the form of `if then { ... }`. if (Style.isTableGen()) { while (!eof() && FormatTok->isNot(Keywords.kw_then)) { // Simply skip until then. This range only contains a value. nextToken(); } } // Handle `if !consteval`. if (FormatTok->is(tok::exclaim)) nextToken(); bool KeepIfBraces = true; if (FormatTok->is(tok::kw_consteval)) { nextToken(); } else { KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces; if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier)) nextToken(); if (FormatTok->is(tok::l_paren)) { FormatTok->setFinalizedType(TT_ConditionLParen); parseParens(); } } handleAttributes(); // The then action is optional in Verilog assert statements. if (IsVerilogAssert && FormatTok->is(tok::semi)) { nextToken(); addUnwrappedLine(); return nullptr; } bool NeedsUnwrappedLine = false; keepAncestorBraces(); FormatToken *IfLeftBrace = nullptr; IfStmtKind IfBlockKind = IfStmtKind::NotIf; if (isBlockBegin(*FormatTok)) { FormatTok->setFinalizedType(TT_ControlStatementLBrace); IfLeftBrace = FormatTok; CompoundStatementIndenter Indenter(this, Style, Line->Level); parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind); setPreviousRBraceType(TT_ControlStatementRBrace); if (Style.BraceWrapping.BeforeElse) addUnwrappedLine(); else NeedsUnwrappedLine = true; } else if (IsVerilogAssert && FormatTok->is(tok::kw_else)) { addUnwrappedLine(); } else { parseUnbracedBody(); } if (Style.RemoveBracesLLVM) { assert(!NestedTooDeep.empty()); KeepIfBraces = KeepIfBraces || (IfLeftBrace && !IfLeftBrace->MatchingParen) || NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly || IfBlockKind == IfStmtKind::IfElseIf; } bool KeepElseBraces = KeepIfBraces; FormatToken *ElseLeftBrace = nullptr; IfStmtKind Kind = IfStmtKind::IfOnly; if (FormatTok->is(tok::kw_else)) { if (Style.RemoveBracesLLVM) { NestedTooDeep.back() = false; Kind = IfStmtKind::IfElse; } nextToken(); handleAttributes(); if (isBlockBegin(*FormatTok)) { const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if); FormatTok->setFinalizedType(TT_ElseLBrace); ElseLeftBrace = FormatTok; CompoundStatementIndenter Indenter(this, Style, Line->Level); IfStmtKind ElseBlockKind = IfStmtKind::NotIf; FormatToken *IfLBrace = parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind); setPreviousRBraceType(TT_ElseRBrace); if (FormatTok->is(tok::kw_else)) { KeepElseBraces = KeepElseBraces || ElseBlockKind == IfStmtKind::IfOnly || ElseBlockKind == IfStmtKind::IfElseIf; } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) { KeepElseBraces = true; assert(ElseLeftBrace->MatchingParen); markOptionalBraces(ElseLeftBrace); } addUnwrappedLine(); } else if (!IsVerilogAssert && FormatTok->is(tok::kw_if)) { const FormatToken *Previous = Tokens->getPreviousToken(); assert(Previous); const bool IsPrecededByComment = Previous->is(tok::comment); if (IsPrecededByComment) { addUnwrappedLine(); ++Line->Level; } bool TooDeep = true; if (Style.RemoveBracesLLVM) { Kind = IfStmtKind::IfElseIf; TooDeep = NestedTooDeep.pop_back_val(); } ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces); if (Style.RemoveBracesLLVM) NestedTooDeep.push_back(TooDeep); if (IsPrecededByComment) --Line->Level; } else { parseUnbracedBody(/*CheckEOF=*/true); } } else { KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse; if (NeedsUnwrappedLine) addUnwrappedLine(); } if (!Style.RemoveBracesLLVM) return nullptr; assert(!NestedTooDeep.empty()); KeepElseBraces = KeepElseBraces || (ElseLeftBrace && !ElseLeftBrace->MatchingParen) || NestedTooDeep.back(); NestedTooDeep.pop_back(); if (!KeepIfBraces && !KeepElseBraces) { markOptionalBraces(IfLeftBrace); markOptionalBraces(ElseLeftBrace); } else if (IfLeftBrace) { FormatToken *IfRightBrace = IfLeftBrace->MatchingParen; if (IfRightBrace) { assert(IfRightBrace->MatchingParen == IfLeftBrace); assert(!IfLeftBrace->Optional); assert(!IfRightBrace->Optional); IfLeftBrace->MatchingParen = nullptr; IfRightBrace->MatchingParen = nullptr; } } if (IfKind) *IfKind = Kind; return IfLeftBrace; } void UnwrappedLineParser::parseTryCatch() { assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); nextToken(); bool NeedsUnwrappedLine = false; bool HasCtorInitializer = false; if (FormatTok->is(tok::colon)) { auto *Colon = FormatTok; // We are in a function try block, what comes is an initializer list. nextToken(); if (FormatTok->is(tok::identifier)) { HasCtorInitializer = true; Colon->setFinalizedType(TT_CtorInitializerColon); } // In case identifiers were removed by clang-tidy, what might follow is // multiple commas in sequence - before the first identifier. while (FormatTok->is(tok::comma)) nextToken(); while (FormatTok->is(tok::identifier)) { nextToken(); if (FormatTok->is(tok::l_paren)) { parseParens(); } else if (FormatTok->is(tok::l_brace)) { nextToken(); parseBracedList(); } // In case identifiers were removed by clang-tidy, what might follow is // multiple commas in sequence - after the first identifier. while (FormatTok->is(tok::comma)) nextToken(); } } // Parse try with resource. if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) parseParens(); keepAncestorBraces(); if (FormatTok->is(tok::l_brace)) { if (HasCtorInitializer) FormatTok->setFinalizedType(TT_FunctionLBrace); CompoundStatementIndenter Indenter(this, Style, Line->Level); parseBlock(); if (Style.BraceWrapping.BeforeCatch) addUnwrappedLine(); else NeedsUnwrappedLine = true; } else if (FormatTok->isNot(tok::kw_catch)) { // The C++ standard requires a compound-statement after a try. // If there's none, we try to assume there's a structuralElement // and try to continue. addUnwrappedLine(); ++Line->Level; parseStructuralElement(); --Line->Level; } while (true) { if (FormatTok->is(tok::at)) nextToken(); if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, tok::kw___finally) || ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && FormatTok->is(Keywords.kw_finally)) || (FormatTok->isObjCAtKeyword(tok::objc_catch) || FormatTok->isObjCAtKeyword(tok::objc_finally)))) { break; } nextToken(); while (FormatTok->isNot(tok::l_brace)) { if (FormatTok->is(tok::l_paren)) { parseParens(); continue; } if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) { if (Style.RemoveBracesLLVM) NestedTooDeep.pop_back(); return; } nextToken(); } NeedsUnwrappedLine = false; Line->MustBeDeclaration = false; CompoundStatementIndenter Indenter(this, Style, Line->Level); parseBlock(); if (Style.BraceWrapping.BeforeCatch) addUnwrappedLine(); else NeedsUnwrappedLine = true; } if (Style.RemoveBracesLLVM) NestedTooDeep.pop_back(); if (NeedsUnwrappedLine) addUnwrappedLine(); } void UnwrappedLineParser::parseNamespace() { assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) && "'namespace' expected"); const FormatToken &InitialToken = *FormatTok; nextToken(); if (InitialToken.is(TT_NamespaceMacro)) { parseParens(); } else { while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline, tok::l_square, tok::period, tok::l_paren) || (Style.isCSharp() && FormatTok->is(tok::kw_union))) { if (FormatTok->is(tok::l_square)) parseSquare(); else if (FormatTok->is(tok::l_paren)) parseParens(); else nextToken(); } } if (FormatTok->is(tok::l_brace)) { FormatTok->setFinalizedType(TT_NamespaceLBrace); if (ShouldBreakBeforeBrace(Style, InitialToken)) addUnwrappedLine(); unsigned AddLevels = Style.NamespaceIndentation == FormatStyle::NI_All || (Style.NamespaceIndentation == FormatStyle::NI_Inner && DeclarationScopeStack.size() > 1) ? 1u : 0u; bool ManageWhitesmithsBraces = AddLevels == 0u && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; // If we're in Whitesmiths mode, indent the brace if we're not indenting // the whole block. if (ManageWhitesmithsBraces) ++Line->Level; // Munch the semicolon after a namespace. This is more common than one would // think. Putting the semicolon into its own line is very ugly. parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true, /*KeepBraces=*/true, /*IfKind=*/nullptr, ManageWhitesmithsBraces); addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep); if (ManageWhitesmithsBraces) --Line->Level; } // FIXME: Add error handling. } void UnwrappedLineParser::parseNew() { assert(FormatTok->is(tok::kw_new) && "'new' expected"); nextToken(); if (Style.isCSharp()) { do { // Handle constructor invocation, e.g. `new(field: value)`. if (FormatTok->is(tok::l_paren)) parseParens(); // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`. if (FormatTok->is(tok::l_brace)) parseBracedList(); if (FormatTok->isOneOf(tok::semi, tok::comma)) return; nextToken(); } while (!eof()); } if (Style.Language != FormatStyle::LK_Java) return; // In Java, we can parse everything up to the parens, which aren't optional. do { // There should not be a ;, { or } before the new's open paren. if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) return; // Consume the parens. if (FormatTok->is(tok::l_paren)) { parseParens(); // If there is a class body of an anonymous class, consume that as child. if (FormatTok->is(tok::l_brace)) parseChildBlock(); return; } nextToken(); } while (!eof()); } void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) { keepAncestorBraces(); if (isBlockBegin(*FormatTok)) { FormatTok->setFinalizedType(TT_ControlStatementLBrace); FormatToken *LeftBrace = FormatTok; CompoundStatementIndenter Indenter(this, Style, Line->Level); parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, /*MunchSemi=*/true, KeepBraces); setPreviousRBraceType(TT_ControlStatementRBrace); if (!KeepBraces) { assert(!NestedTooDeep.empty()); if (!NestedTooDeep.back()) markOptionalBraces(LeftBrace); } if (WrapRightBrace) addUnwrappedLine(); } else { parseUnbracedBody(); } if (!KeepBraces) NestedTooDeep.pop_back(); } void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) { assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) || (Style.isVerilog() && FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb, Keywords.kw_always_ff, Keywords.kw_always_latch, Keywords.kw_final, Keywords.kw_initial, Keywords.kw_foreach, Keywords.kw_forever, Keywords.kw_repeat))) && "'for', 'while' or foreach macro expected"); const bool KeepBraces = !Style.RemoveBracesLLVM || !FormatTok->isOneOf(tok::kw_for, tok::kw_while); nextToken(); // JS' for await ( ... if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await)) nextToken(); if (IsCpp && FormatTok->is(tok::kw_co_await)) nextToken(); if (HasParens && FormatTok->is(tok::l_paren)) { // The type is only set for Verilog basically because we were afraid to // change the existing behavior for loops. See the discussion on D121756 for // details. if (Style.isVerilog()) FormatTok->setFinalizedType(TT_ConditionLParen); parseParens(); } if (Style.isVerilog()) { // Event control. parseVerilogSensitivityList(); } else if (Style.AllowShortLoopsOnASingleLine && FormatTok->is(tok::semi) && Tokens->getPreviousToken()->is(tok::r_paren)) { nextToken(); addUnwrappedLine(); return; } handleAttributes(); parseLoopBody(KeepBraces, /*WrapRightBrace=*/true); } void UnwrappedLineParser::parseDoWhile() { assert(FormatTok->is(tok::kw_do) && "'do' expected"); nextToken(); parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile); // FIXME: Add error handling. if (FormatTok->isNot(tok::kw_while)) { addUnwrappedLine(); return; } FormatTok->setFinalizedType(TT_DoWhile); // If in Whitesmiths mode, the line with the while() needs to be indented // to the same level as the block. if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) ++Line->Level; nextToken(); parseStructuralElement(); } void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) { nextToken(); unsigned OldLineLevel = Line->Level; if (LeftAlignLabel) Line->Level = 0; else if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) --Line->Level; if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() && FormatTok->is(tok::l_brace)) { CompoundStatementIndenter Indenter(this, Line->Level, Style.BraceWrapping.AfterCaseLabel, Style.BraceWrapping.IndentBraces); parseBlock(); if (FormatTok->is(tok::kw_break)) { if (Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Always) { addUnwrappedLine(); if (!Style.IndentCaseBlocks && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { ++Line->Level; } } parseStructuralElement(); } addUnwrappedLine(); } else { if (FormatTok->is(tok::semi)) nextToken(); addUnwrappedLine(); } Line->Level = OldLineLevel; if (FormatTok->isNot(tok::l_brace)) { parseStructuralElement(); addUnwrappedLine(); } } void UnwrappedLineParser::parseCaseLabel() { assert(FormatTok->is(tok::kw_case) && "'case' expected"); auto *Case = FormatTok; // FIXME: fix handling of complex expressions here. do { nextToken(); if (FormatTok->is(tok::colon)) { FormatTok->setFinalizedType(TT_CaseLabelColon); break; } if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::arrow)) { FormatTok->setFinalizedType(TT_CaseLabelArrow); Case->setFinalizedType(TT_SwitchExpressionLabel); break; } } while (!eof()); parseLabel(); } void UnwrappedLineParser::parseSwitch(bool IsExpr) { assert(FormatTok->is(tok::kw_switch) && "'switch' expected"); nextToken(); if (FormatTok->is(tok::l_paren)) parseParens(); keepAncestorBraces(); if (FormatTok->is(tok::l_brace)) { CompoundStatementIndenter Indenter(this, Style, Line->Level); FormatTok->setFinalizedType(IsExpr ? TT_SwitchExpressionLBrace : TT_ControlStatementLBrace); if (IsExpr) parseChildBlock(); else parseBlock(); setPreviousRBraceType(TT_ControlStatementRBrace); if (!IsExpr) addUnwrappedLine(); } else { addUnwrappedLine(); ++Line->Level; parseStructuralElement(); --Line->Level; } if (Style.RemoveBracesLLVM) NestedTooDeep.pop_back(); } // Operators that can follow a C variable. static bool isCOperatorFollowingVar(tok::TokenKind Kind) { switch (Kind) { case tok::ampamp: case tok::ampequal: case tok::arrow: case tok::caret: case tok::caretequal: case tok::comma: case tok::ellipsis: case tok::equal: case tok::equalequal: case tok::exclaim: case tok::exclaimequal: case tok::greater: case tok::greaterequal: case tok::greatergreater: case tok::greatergreaterequal: case tok::l_paren: case tok::l_square: case tok::less: case tok::lessequal: case tok::lessless: case tok::lesslessequal: case tok::minus: case tok::minusequal: case tok::minusminus: case tok::percent: case tok::percentequal: case tok::period: case tok::pipe: case tok::pipeequal: case tok::pipepipe: case tok::plus: case tok::plusequal: case tok::plusplus: case tok::question: case tok::r_brace: case tok::r_paren: case tok::r_square: case tok::semi: case tok::slash: case tok::slashequal: case tok::star: case tok::starequal: return true; default: return false; } } void UnwrappedLineParser::parseAccessSpecifier() { FormatToken *AccessSpecifierCandidate = FormatTok; nextToken(); // Understand Qt's slots. if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) nextToken(); // Otherwise, we don't know what it is, and we'd better keep the next token. if (FormatTok->is(tok::colon)) { nextToken(); addUnwrappedLine(); } else if (FormatTok->isNot(tok::coloncolon) && !isCOperatorFollowingVar(FormatTok->Tok.getKind())) { // Not a variable name nor namespace name. addUnwrappedLine(); } else if (AccessSpecifierCandidate) { // Consider the access specifier to be a C identifier. AccessSpecifierCandidate->Tok.setKind(tok::identifier); } } /// \brief Parses a requires, decides if it is a clause or an expression. /// \pre The current token has to be the requires keyword. /// \returns true if it parsed a clause. bool UnwrappedLineParser::parseRequires() { assert(FormatTok->is(tok::kw_requires) && "'requires' expected"); auto RequiresToken = FormatTok; // We try to guess if it is a requires clause, or a requires expression. For // that we first consume the keyword and check the next token. nextToken(); switch (FormatTok->Tok.getKind()) { case tok::l_brace: // This can only be an expression, never a clause. parseRequiresExpression(RequiresToken); return false; case tok::l_paren: // Clauses and expression can start with a paren, it's unclear what we have. break; default: // All other tokens can only be a clause. parseRequiresClause(RequiresToken); return true; } // Looking forward we would have to decide if there are function declaration // like arguments to the requires expression: // requires (T t) { // Or there is a constraint expression for the requires clause: // requires (C && ... // But first let's look behind. auto *PreviousNonComment = RequiresToken->getPreviousNonComment(); if (!PreviousNonComment || PreviousNonComment->is(TT_RequiresExpressionLBrace)) { // If there is no token, or an expression left brace, we are a requires // clause within a requires expression. parseRequiresClause(RequiresToken); return true; } switch (PreviousNonComment->Tok.getKind()) { case tok::greater: case tok::r_paren: case tok::kw_noexcept: case tok::kw_const: // This is a requires clause. parseRequiresClause(RequiresToken); return true; case tok::amp: case tok::ampamp: { // This can be either: // if (... && requires (T t) ...) // Or // void member(...) && requires (C ... // We check the one token before that for a const: // void member(...) const && requires (C ... auto PrevPrev = PreviousNonComment->getPreviousNonComment(); if (PrevPrev && PrevPrev->is(tok::kw_const)) { parseRequiresClause(RequiresToken); return true; } break; } default: if (PreviousNonComment->isTypeOrIdentifier(LangOpts)) { // This is a requires clause. parseRequiresClause(RequiresToken); return true; } // It's an expression. parseRequiresExpression(RequiresToken); return false; } // Now we look forward and try to check if the paren content is a parameter // list. The parameters can be cv-qualified and contain references or // pointers. // So we want basically to check for TYPE NAME, but TYPE can contain all kinds // of stuff: typename, const, *, &, &&, ::, identifiers. unsigned StoredPosition = Tokens->getPosition(); FormatToken *NextToken = Tokens->getNextToken(); int Lookahead = 0; auto PeekNext = [&Lookahead, &NextToken, this] { ++Lookahead; NextToken = Tokens->getNextToken(); }; bool FoundType = false; bool LastWasColonColon = false; int OpenAngles = 0; for (; Lookahead < 50; PeekNext()) { switch (NextToken->Tok.getKind()) { case tok::kw_volatile: case tok::kw_const: case tok::comma: if (OpenAngles == 0) { FormatTok = Tokens->setPosition(StoredPosition); parseRequiresExpression(RequiresToken); return false; } break; case tok::eof: // Break out of the loop. Lookahead = 50; break; case tok::coloncolon: LastWasColonColon = true; break; case tok::kw_decltype: case tok::identifier: if (FoundType && !LastWasColonColon && OpenAngles == 0) { FormatTok = Tokens->setPosition(StoredPosition); parseRequiresExpression(RequiresToken); return false; } FoundType = true; LastWasColonColon = false; break; case tok::less: ++OpenAngles; break; case tok::greater: --OpenAngles; break; default: if (NextToken->isTypeName(LangOpts)) { FormatTok = Tokens->setPosition(StoredPosition); parseRequiresExpression(RequiresToken); return false; } break; } } // This seems to be a complicated expression, just assume it's a clause. FormatTok = Tokens->setPosition(StoredPosition); parseRequiresClause(RequiresToken); return true; } /// \brief Parses a requires clause. /// \param RequiresToken The requires keyword token, which starts this clause. /// \pre We need to be on the next token after the requires keyword. /// \sa parseRequiresExpression /// /// Returns if it either has finished parsing the clause, or it detects, that /// the clause is incorrect. void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) { assert(FormatTok->getPreviousNonComment() == RequiresToken); assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); // If there is no previous token, we are within a requires expression, // otherwise we will always have the template or function declaration in front // of it. bool InRequiresExpression = !RequiresToken->Previous || RequiresToken->Previous->is(TT_RequiresExpressionLBrace); RequiresToken->setFinalizedType(InRequiresExpression ? TT_RequiresClauseInARequiresExpression : TT_RequiresClause); // NOTE: parseConstraintExpression is only ever called from this function. // It could be inlined into here. parseConstraintExpression(); if (!InRequiresExpression) FormatTok->Previous->ClosesRequiresClause = true; } /// \brief Parses a requires expression. /// \param RequiresToken The requires keyword token, which starts this clause. /// \pre We need to be on the next token after the requires keyword. /// \sa parseRequiresClause /// /// Returns if it either has finished parsing the expression, or it detects, /// that the expression is incorrect. void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) { assert(FormatTok->getPreviousNonComment() == RequiresToken); assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); RequiresToken->setFinalizedType(TT_RequiresExpression); if (FormatTok->is(tok::l_paren)) { FormatTok->setFinalizedType(TT_RequiresExpressionLParen); parseParens(); } if (FormatTok->is(tok::l_brace)) { FormatTok->setFinalizedType(TT_RequiresExpressionLBrace); parseChildBlock(); } } /// \brief Parses a constraint expression. /// /// This is the body of a requires clause. It returns, when the parsing is /// complete, or the expression is incorrect. void UnwrappedLineParser::parseConstraintExpression() { // The special handling for lambdas is needed since tryToParseLambda() eats a // token and if a requires expression is the last part of a requires clause // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is // not set on the correct token. Thus we need to be aware if we even expect a // lambda to be possible. // template requires requires { ... } [[nodiscard]] ...; bool LambdaNextTimeAllowed = true; // Within lambda declarations, it is permitted to put a requires clause after // its template parameter list, which would place the requires clause right // before the parentheses of the parameters of the lambda declaration. Thus, // we track if we expect to see grouping parentheses at all. // Without this check, `requires foo (T t)` in the below example would be // seen as the whole requires clause, accidentally eating the parameters of // the lambda. // [&] requires foo (T t) { ... }; bool TopLevelParensAllowed = true; do { bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false); switch (FormatTok->Tok.getKind()) { case tok::kw_requires: { auto RequiresToken = FormatTok; nextToken(); parseRequiresExpression(RequiresToken); break; } case tok::l_paren: if (!TopLevelParensAllowed) return; parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator); TopLevelParensAllowed = false; break; case tok::l_square: if (!LambdaThisTimeAllowed || !tryToParseLambda()) return; break; case tok::kw_const: case tok::semi: case tok::kw_class: case tok::kw_struct: case tok::kw_union: return; case tok::l_brace: // Potential function body. return; case tok::ampamp: case tok::pipepipe: FormatTok->setFinalizedType(TT_BinaryOperator); nextToken(); LambdaNextTimeAllowed = true; TopLevelParensAllowed = true; break; case tok::comma: case tok::comment: LambdaNextTimeAllowed = LambdaThisTimeAllowed; nextToken(); break; case tok::kw_sizeof: case tok::greater: case tok::greaterequal: case tok::greatergreater: case tok::less: case tok::lessequal: case tok::lessless: case tok::equalequal: case tok::exclaim: case tok::exclaimequal: case tok::plus: case tok::minus: case tok::star: case tok::slash: LambdaNextTimeAllowed = true; TopLevelParensAllowed = true; // Just eat them. nextToken(); break; case tok::numeric_constant: case tok::coloncolon: case tok::kw_true: case tok::kw_false: TopLevelParensAllowed = false; // Just eat them. nextToken(); break; case tok::kw_static_cast: case tok::kw_const_cast: case tok::kw_reinterpret_cast: case tok::kw_dynamic_cast: nextToken(); if (FormatTok->isNot(tok::less)) return; nextToken(); parseBracedList(/*IsAngleBracket=*/true); break; default: if (!FormatTok->Tok.getIdentifierInfo()) { // Identifiers are part of the default case, we check for more then // tok::identifier to handle builtin type traits. return; } // We need to differentiate identifiers for a template deduction guide, // variables, or function return types (the constraint expression has // ended before that), and basically all other cases. But it's easier to // check the other way around. assert(FormatTok->Previous); switch (FormatTok->Previous->Tok.getKind()) { case tok::coloncolon: // Nested identifier. case tok::ampamp: // Start of a function or variable for the case tok::pipepipe: // constraint expression. (binary) case tok::exclaim: // The same as above, but unary. case tok::kw_requires: // Initial identifier of a requires clause. case tok::equal: // Initial identifier of a concept declaration. break; default: return; } // Read identifier with optional template declaration. nextToken(); if (FormatTok->is(tok::less)) { nextToken(); parseBracedList(/*IsAngleBracket=*/true); } TopLevelParensAllowed = false; break; } } while (!eof()); } bool UnwrappedLineParser::parseEnum() { const FormatToken &InitialToken = *FormatTok; // Won't be 'enum' for NS_ENUMs. if (FormatTok->is(tok::kw_enum)) nextToken(); // In TypeScript, "enum" can also be used as property name, e.g. in interface // declarations. An "enum" keyword followed by a colon would be a syntax // error and thus assume it is just an identifier. if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question)) return false; // In protobuf, "enum" can be used as a field name. if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal)) return false; if (IsCpp) { // Eat up enum class ... if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct)) nextToken(); while (FormatTok->is(tok::l_square)) if (!handleCppAttributes()) return false; } while (FormatTok->Tok.getIdentifierInfo() || FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, tok::greater, tok::comma, tok::question, tok::l_square)) { if (Style.isVerilog()) { FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName); nextToken(); // In Verilog the base type can have dimensions. while (FormatTok->is(tok::l_square)) parseSquare(); } else { nextToken(); } // We can have macros or attributes in between 'enum' and the enum name. if (FormatTok->is(tok::l_paren)) parseParens(); if (FormatTok->is(tok::identifier)) { nextToken(); // If there are two identifiers in a row, this is likely an elaborate // return type. In Java, this can be "implements", etc. if (IsCpp && FormatTok->is(tok::identifier)) return false; } } // Just a declaration or something is wrong. if (FormatTok->isNot(tok::l_brace)) return true; FormatTok->setFinalizedType(TT_EnumLBrace); FormatTok->setBlockKind(BK_Block); if (Style.Language == FormatStyle::LK_Java) { // Java enums are different. parseJavaEnumBody(); return true; } if (Style.Language == FormatStyle::LK_Proto) { parseBlock(/*MustBeDeclaration=*/true); return true; } if (!Style.AllowShortEnumsOnASingleLine && ShouldBreakBeforeBrace(Style, InitialToken)) { addUnwrappedLine(); } // Parse enum body. nextToken(); if (!Style.AllowShortEnumsOnASingleLine) { addUnwrappedLine(); Line->Level += 1; } bool HasError = !parseBracedList(/*IsAngleBracket=*/false, /*IsEnum=*/true); if (!Style.AllowShortEnumsOnASingleLine) Line->Level -= 1; if (HasError) { if (FormatTok->is(tok::semi)) nextToken(); addUnwrappedLine(); } setPreviousRBraceType(TT_EnumRBrace); return true; // There is no addUnwrappedLine() here so that we fall through to parsing a // structural element afterwards. Thus, in "enum A {} n, m;", // "} n, m;" will end up in one unwrapped line. } bool UnwrappedLineParser::parseStructLike() { // parseRecord falls through and does not yet add an unwrapped line as a // record declaration or definition can start a structural element. parseRecord(); // This does not apply to Java, JavaScript and C#. if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || Style.isCSharp()) { if (FormatTok->is(tok::semi)) nextToken(); addUnwrappedLine(); return true; } return false; } namespace { // A class used to set and restore the Token position when peeking // ahead in the token source. class ScopedTokenPosition { unsigned StoredPosition; FormatTokenSource *Tokens; public: ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) { assert(Tokens && "Tokens expected to not be null"); StoredPosition = Tokens->getPosition(); } ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); } }; } // namespace // Look to see if we have [[ by looking ahead, if // its not then rewind to the original position. bool UnwrappedLineParser::tryToParseSimpleAttribute() { ScopedTokenPosition AutoPosition(Tokens); FormatToken *Tok = Tokens->getNextToken(); // We already read the first [ check for the second. if (Tok->isNot(tok::l_square)) return false; // Double check that the attribute is just something // fairly simple. while (Tok->isNot(tok::eof)) { if (Tok->is(tok::r_square)) break; Tok = Tokens->getNextToken(); } if (Tok->is(tok::eof)) return false; Tok = Tokens->getNextToken(); if (Tok->isNot(tok::r_square)) return false; Tok = Tokens->getNextToken(); if (Tok->is(tok::semi)) return false; return true; } void UnwrappedLineParser::parseJavaEnumBody() { assert(FormatTok->is(tok::l_brace)); const FormatToken *OpeningBrace = FormatTok; // Determine whether the enum is simple, i.e. does not have a semicolon or // constants with class bodies. Simple enums can be formatted like braced // lists, contracted to a single line, etc. unsigned StoredPosition = Tokens->getPosition(); bool IsSimple = true; FormatToken *Tok = Tokens->getNextToken(); while (Tok->isNot(tok::eof)) { if (Tok->is(tok::r_brace)) break; if (Tok->isOneOf(tok::l_brace, tok::semi)) { IsSimple = false; break; } // FIXME: This will also mark enums with braces in the arguments to enum // constants as "not simple". This is probably fine in practice, though. Tok = Tokens->getNextToken(); } FormatTok = Tokens->setPosition(StoredPosition); if (IsSimple) { nextToken(); parseBracedList(); addUnwrappedLine(); return; } // Parse the body of a more complex enum. // First add a line for everything up to the "{". nextToken(); addUnwrappedLine(); ++Line->Level; // Parse the enum constants. while (!eof()) { if (FormatTok->is(tok::l_brace)) { // Parse the constant's class body. parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u, /*MunchSemi=*/false); } else if (FormatTok->is(tok::l_paren)) { parseParens(); } else if (FormatTok->is(tok::comma)) { nextToken(); addUnwrappedLine(); } else if (FormatTok->is(tok::semi)) { nextToken(); addUnwrappedLine(); break; } else if (FormatTok->is(tok::r_brace)) { addUnwrappedLine(); break; } else { nextToken(); } } // Parse the class body after the enum's ";" if any. parseLevel(OpeningBrace); nextToken(); --Line->Level; addUnwrappedLine(); } void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { const FormatToken &InitialToken = *FormatTok; nextToken(); const FormatToken *ClassName = nullptr; bool IsDerived = false; auto IsNonMacroIdentifier = [](const FormatToken *Tok) { return Tok->is(tok::identifier) && Tok->TokenText != Tok->TokenText.upper(); }; // JavaScript/TypeScript supports anonymous classes like: // a = class extends foo { } bool JSPastExtendsOrImplements = false; // The actual identifier can be a nested name specifier, and in macros // it is often token-pasted. // An [[attribute]] can be before the identifier. while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, tok::kw_alignas, tok::l_square) || FormatTok->isAttribute() || ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && FormatTok->isOneOf(tok::period, tok::comma))) { if (Style.isJavaScript() && FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { JSPastExtendsOrImplements = true; // JavaScript/TypeScript supports inline object types in // extends/implements positions: // class Foo implements {bar: number} { } nextToken(); if (FormatTok->is(tok::l_brace)) { tryToParseBracedList(); continue; } } if (FormatTok->is(tok::l_square) && handleCppAttributes()) continue; const auto *Previous = FormatTok; nextToken(); switch (FormatTok->Tok.getKind()) { case tok::l_paren: // We can have macros in between 'class' and the class name. if (!IsNonMacroIdentifier(Previous) || // e.g. `struct macro(a) S { int i; };` Previous->Previous == &InitialToken) { parseParens(); } break; case tok::coloncolon: case tok::hashhash: break; default: if (!JSPastExtendsOrImplements && !ClassName && Previous->is(tok::identifier) && Previous->isNot(TT_AttributeMacro)) { ClassName = Previous; } } } auto IsListInitialization = [&] { if (!ClassName || IsDerived) return false; assert(FormatTok->is(tok::l_brace)); const auto *Prev = FormatTok->getPreviousNonComment(); assert(Prev); return Prev != ClassName && Prev->is(tok::identifier) && Prev->isNot(Keywords.kw_final) && tryToParseBracedList(); }; if (FormatTok->isOneOf(tok::colon, tok::less)) { int AngleNestingLevel = 0; do { if (FormatTok->is(tok::less)) ++AngleNestingLevel; else if (FormatTok->is(tok::greater)) --AngleNestingLevel; if (AngleNestingLevel == 0) { if (FormatTok->is(tok::colon)) { IsDerived = true; } else if (FormatTok->is(tok::identifier) && FormatTok->Previous->is(tok::coloncolon)) { ClassName = FormatTok; } else if (FormatTok->is(tok::l_paren) && IsNonMacroIdentifier(FormatTok->Previous)) { break; } } if (FormatTok->is(tok::l_brace)) { if (AngleNestingLevel == 0 && IsListInitialization()) return; calculateBraceTypes(/*ExpectClassBody=*/true); if (!tryToParseBracedList()) break; } if (FormatTok->is(tok::l_square)) { FormatToken *Previous = FormatTok->Previous; if (!Previous || (Previous->isNot(tok::r_paren) && !Previous->isTypeOrIdentifier(LangOpts))) { // Don't try parsing a lambda if we had a closing parenthesis before, // it was probably a pointer to an array: int (*)[]. if (!tryToParseLambda()) continue; } else { parseSquare(); continue; } } if (FormatTok->is(tok::semi)) return; if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) { addUnwrappedLine(); nextToken(); parseCSharpGenericTypeConstraint(); break; } nextToken(); } while (!eof()); } auto GetBraceTypes = [](const FormatToken &RecordTok) -> std::pair { switch (RecordTok.Tok.getKind()) { case tok::kw_class: return {TT_ClassLBrace, TT_ClassRBrace}; case tok::kw_struct: return {TT_StructLBrace, TT_StructRBrace}; case tok::kw_union: return {TT_UnionLBrace, TT_UnionRBrace}; default: // Useful for e.g. interface. return {TT_RecordLBrace, TT_RecordRBrace}; } }; if (FormatTok->is(tok::l_brace)) { if (IsListInitialization()) return; auto [OpenBraceType, ClosingBraceType] = GetBraceTypes(InitialToken); FormatTok->setFinalizedType(OpenBraceType); if (ParseAsExpr) { parseChildBlock(); } else { if (ShouldBreakBeforeBrace(Style, InitialToken)) addUnwrappedLine(); unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u; parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false); } setPreviousRBraceType(ClosingBraceType); } // There is no addUnwrappedLine() here so that we fall through to parsing a // structural element afterwards. Thus, in "class A {} n, m;", // "} n, m;" will end up in one unwrapped line. } void UnwrappedLineParser::parseObjCMethod() { assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) && "'(' or identifier expected."); do { if (FormatTok->is(tok::semi)) { nextToken(); addUnwrappedLine(); return; } else if (FormatTok->is(tok::l_brace)) { if (Style.BraceWrapping.AfterFunction) addUnwrappedLine(); parseBlock(); addUnwrappedLine(); return; } else { nextToken(); } } while (!eof()); } void UnwrappedLineParser::parseObjCProtocolList() { assert(FormatTok->is(tok::less) && "'<' expected."); do { nextToken(); // Early exit in case someone forgot a close angle. if (FormatTok->isOneOf(tok::semi, tok::l_brace) || FormatTok->isObjCAtKeyword(tok::objc_end)) { return; } } while (!eof() && FormatTok->isNot(tok::greater)); nextToken(); // Skip '>'. } void UnwrappedLineParser::parseObjCUntilAtEnd() { do { if (FormatTok->isObjCAtKeyword(tok::objc_end)) { nextToken(); addUnwrappedLine(); break; } if (FormatTok->is(tok::l_brace)) { parseBlock(); // In ObjC interfaces, nothing should be following the "}". addUnwrappedLine(); } else if (FormatTok->is(tok::r_brace)) { // Ignore stray "}". parseStructuralElement doesn't consume them. nextToken(); addUnwrappedLine(); } else if (FormatTok->isOneOf(tok::minus, tok::plus)) { nextToken(); parseObjCMethod(); } else { parseStructuralElement(); } } while (!eof()); } void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface || FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation); nextToken(); nextToken(); // interface name // @interface can be followed by a lightweight generic // specialization list, then either a base class or a category. if (FormatTok->is(tok::less)) parseObjCLightweightGenerics(); if (FormatTok->is(tok::colon)) { nextToken(); nextToken(); // base class name // The base class can also have lightweight generics applied to it. if (FormatTok->is(tok::less)) parseObjCLightweightGenerics(); } else if (FormatTok->is(tok::l_paren)) { // Skip category, if present. parseParens(); } if (FormatTok->is(tok::less)) parseObjCProtocolList(); if (FormatTok->is(tok::l_brace)) { if (Style.BraceWrapping.AfterObjCDeclaration) addUnwrappedLine(); parseBlock(/*MustBeDeclaration=*/true); } // With instance variables, this puts '}' on its own line. Without instance // variables, this ends the @interface line. addUnwrappedLine(); parseObjCUntilAtEnd(); } void UnwrappedLineParser::parseObjCLightweightGenerics() { assert(FormatTok->is(tok::less)); // Unlike protocol lists, generic parameterizations support // nested angles: // // @interface Foo> : // NSObject // // so we need to count how many open angles we have left. unsigned NumOpenAngles = 1; do { nextToken(); // Early exit in case someone forgot a close angle. if (FormatTok->isOneOf(tok::semi, tok::l_brace) || FormatTok->isObjCAtKeyword(tok::objc_end)) { break; } if (FormatTok->is(tok::less)) { ++NumOpenAngles; } else if (FormatTok->is(tok::greater)) { assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative"); --NumOpenAngles; } } while (!eof() && NumOpenAngles != 0); nextToken(); // Skip '>'. } // Returns true for the declaration/definition form of @protocol, // false for the expression form. bool UnwrappedLineParser::parseObjCProtocol() { assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); nextToken(); if (FormatTok->is(tok::l_paren)) { // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". return false; } // The definition/declaration form, // @protocol Foo // - (int)someMethod; // @end nextToken(); // protocol name if (FormatTok->is(tok::less)) parseObjCProtocolList(); // Check for protocol declaration. if (FormatTok->is(tok::semi)) { nextToken(); addUnwrappedLine(); return true; } addUnwrappedLine(); parseObjCUntilAtEnd(); return true; } void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { bool IsImport = FormatTok->is(Keywords.kw_import); assert(IsImport || FormatTok->is(tok::kw_export)); nextToken(); // Consume the "default" in "export default class/function". if (FormatTok->is(tok::kw_default)) nextToken(); // Consume "async function", "function" and "default function", so that these // get parsed as free-standing JS functions, i.e. do not require a trailing // semicolon. if (FormatTok->is(Keywords.kw_async)) nextToken(); if (FormatTok->is(Keywords.kw_function)) { nextToken(); return; } // For imports, `export *`, `export {...}`, consume the rest of the line up // to the terminating `;`. For everything else, just return and continue // parsing the structural element, i.e. the declaration or expression for // `export default`. if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && !FormatTok->isStringLiteral() && !(FormatTok->is(Keywords.kw_type) && Tokens->peekNextToken()->isOneOf(tok::l_brace, tok::star))) { return; } while (!eof()) { if (FormatTok->is(tok::semi)) return; if (Line->Tokens.empty()) { // Common issue: Automatic Semicolon Insertion wrapped the line, so the // import statement should terminate. return; } if (FormatTok->is(tok::l_brace)) { FormatTok->setBlockKind(BK_Block); nextToken(); parseBracedList(); } else { nextToken(); } } } void UnwrappedLineParser::parseStatementMacro() { nextToken(); if (FormatTok->is(tok::l_paren)) parseParens(); if (FormatTok->is(tok::semi)) nextToken(); addUnwrappedLine(); } void UnwrappedLineParser::parseVerilogHierarchyIdentifier() { // consume things like a::`b.c[d:e] or a::* while (true) { if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar, tok::coloncolon, tok::hash) || Keywords.isVerilogIdentifier(*FormatTok)) { nextToken(); } else if (FormatTok->is(tok::l_square)) { parseSquare(); } else { break; } } } void UnwrappedLineParser::parseVerilogSensitivityList() { if (FormatTok->isNot(tok::at)) return; nextToken(); // A block event expression has 2 at signs. if (FormatTok->is(tok::at)) nextToken(); switch (FormatTok->Tok.getKind()) { case tok::star: nextToken(); break; case tok::l_paren: parseParens(); break; default: parseVerilogHierarchyIdentifier(); break; } } unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() { unsigned AddLevels = 0; if (FormatTok->is(Keywords.kw_clocking)) { nextToken(); if (Keywords.isVerilogIdentifier(*FormatTok)) nextToken(); parseVerilogSensitivityList(); if (FormatTok->is(tok::semi)) nextToken(); } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex, Keywords.kw_casez, Keywords.kw_randcase, Keywords.kw_randsequence)) { if (Style.IndentCaseLabels) AddLevels++; nextToken(); if (FormatTok->is(tok::l_paren)) { FormatTok->setFinalizedType(TT_ConditionLParen); parseParens(); } if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches)) nextToken(); // The case header has no semicolon. } else { // "module" etc. nextToken(); // all the words like the name of the module and specifiers like // "automatic" and the width of function return type while (true) { if (FormatTok->is(tok::l_square)) { auto Prev = FormatTok->getPreviousNonComment(); if (Prev && Keywords.isVerilogIdentifier(*Prev)) Prev->setFinalizedType(TT_VerilogDimensionedTypeName); parseSquare(); } else if (Keywords.isVerilogIdentifier(*FormatTok) || FormatTok->isOneOf(Keywords.kw_automatic, tok::kw_static)) { nextToken(); } else { break; } } auto NewLine = [this]() { addUnwrappedLine(); Line->IsContinuation = true; }; // package imports while (FormatTok->is(Keywords.kw_import)) { NewLine(); nextToken(); parseVerilogHierarchyIdentifier(); if (FormatTok->is(tok::semi)) nextToken(); } // parameters and ports if (FormatTok->is(Keywords.kw_verilogHash)) { NewLine(); nextToken(); if (FormatTok->is(tok::l_paren)) { FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen); parseParens(); } } if (FormatTok->is(tok::l_paren)) { NewLine(); FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen); parseParens(); } // extends and implements if (FormatTok->is(Keywords.kw_extends)) { NewLine(); nextToken(); parseVerilogHierarchyIdentifier(); if (FormatTok->is(tok::l_paren)) parseParens(); } if (FormatTok->is(Keywords.kw_implements)) { NewLine(); do { nextToken(); parseVerilogHierarchyIdentifier(); } while (FormatTok->is(tok::comma)); } // Coverage event for cover groups. if (FormatTok->is(tok::at)) { NewLine(); parseVerilogSensitivityList(); } if (FormatTok->is(tok::semi)) nextToken(/*LevelDifference=*/1); addUnwrappedLine(); } return AddLevels; } void UnwrappedLineParser::parseVerilogTable() { assert(FormatTok->is(Keywords.kw_table)); nextToken(/*LevelDifference=*/1); addUnwrappedLine(); auto InitialLevel = Line->Level++; while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) { FormatToken *Tok = FormatTok; nextToken(); if (Tok->is(tok::semi)) addUnwrappedLine(); else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus)) Tok->setFinalizedType(TT_VerilogTableItem); } Line->Level = InitialLevel; nextToken(/*LevelDifference=*/-1); addUnwrappedLine(); } void UnwrappedLineParser::parseVerilogCaseLabel() { // The label will get unindented in AnnotatingParser. If there are no leading // spaces, indent the rest here so that things inside the block will be // indented relative to things outside. We don't use parseLabel because we // don't know whether this colon is a label or a ternary expression at this // point. auto OrigLevel = Line->Level; auto FirstLine = CurrentLines->size(); if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1)) ++Line->Level; else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok)) --Line->Level; parseStructuralElement(); // Restore the indentation in both the new line and the line that has the // label. if (CurrentLines->size() > FirstLine) (*CurrentLines)[FirstLine].Level = OrigLevel; Line->Level = OrigLevel; } bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const { for (const auto &N : Line.Tokens) { if (N.Tok->MacroCtx) return true; for (const UnwrappedLine &Child : N.Children) if (containsExpansion(Child)) return true; } return false; } void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) { if (Line->Tokens.empty()) return; LLVM_DEBUG({ if (!parsingPPDirective()) { llvm::dbgs() << "Adding unwrapped line:\n"; printDebugInfo(*Line); } }); // If this line closes a block when in Whitesmiths mode, remember that // information so that the level can be decreased after the line is added. // This has to happen after the addition of the line since the line itself // needs to be indented. bool ClosesWhitesmithsBlock = Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; // If the current line was expanded from a macro call, we use it to // reconstruct an unwrapped line from the structure of the expanded unwrapped // line and the unexpanded token stream. if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) { if (!Reconstruct) Reconstruct.emplace(Line->Level, Unexpanded); Reconstruct->addLine(*Line); // While the reconstructed unexpanded lines are stored in the normal // flow of lines, the expanded lines are stored on the side to be analyzed // in an extra step. CurrentExpandedLines.push_back(std::move(*Line)); if (Reconstruct->finished()) { UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult(); assert(!Reconstructed.Tokens.empty() && "Reconstructed must at least contain the macro identifier."); assert(!parsingPPDirective()); LLVM_DEBUG({ llvm::dbgs() << "Adding unexpanded line:\n"; printDebugInfo(Reconstructed); }); ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines; Lines.push_back(std::move(Reconstructed)); CurrentExpandedLines.clear(); Reconstruct.reset(); } } else { // At the top level we only get here when no unexpansion is going on, or // when conditional formatting led to unfinished macro reconstructions. assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0); CurrentLines->push_back(std::move(*Line)); } Line->Tokens.clear(); Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; Line->FirstStartColumn = 0; Line->IsContinuation = false; Line->SeenDecltypeAuto = false; if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove) --Line->Level; if (!parsingPPDirective() && !PreprocessorDirectives.empty()) { CurrentLines->append( std::make_move_iterator(PreprocessorDirectives.begin()), std::make_move_iterator(PreprocessorDirectives.end())); PreprocessorDirectives.clear(); } // Disconnect the current token from the last token on the previous line. FormatTok->Previous = nullptr; } bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); } bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && FormatTok.NewlinesBefore > 0; } // Checks if \p FormatTok is a line comment that continues the line comment // section on \p Line. static bool continuesLineCommentSection(const FormatToken &FormatTok, const UnwrappedLine &Line, const llvm::Regex &CommentPragmasRegex) { if (Line.Tokens.empty()) return false; StringRef IndentContent = FormatTok.TokenText; if (FormatTok.TokenText.starts_with("//") || FormatTok.TokenText.starts_with("/*")) { IndentContent = FormatTok.TokenText.substr(2); } if (CommentPragmasRegex.match(IndentContent)) return false; // If Line starts with a line comment, then FormatTok continues the comment // section if its original column is greater or equal to the original start // column of the line. // // Define the min column token of a line as follows: if a line ends in '{' or // contains a '{' followed by a line comment, then the min column token is // that '{'. Otherwise, the min column token of the line is the first token of // the line. // // If Line starts with a token other than a line comment, then FormatTok // continues the comment section if its original column is greater than the // original start column of the min column token of the line. // // For example, the second line comment continues the first in these cases: // // // first line // // second line // // and: // // // first line // // second line // // and: // // int i; // first line // // second line // // and: // // do { // first line // // second line // int i; // } while (true); // // and: // // enum { // a, // first line // // second line // b // }; // // The second line comment doesn't continue the first in these cases: // // // first line // // second line // // and: // // int i; // first line // // second line // // and: // // do { // first line // // second line // int i; // } while (true); // // and: // // enum { // a, // first line // // second line // }; const FormatToken *MinColumnToken = Line.Tokens.front().Tok; // Scan for '{//'. If found, use the column of '{' as a min column for line // comment section continuation. const FormatToken *PreviousToken = nullptr; for (const UnwrappedLineNode &Node : Line.Tokens) { if (PreviousToken && PreviousToken->is(tok::l_brace) && isLineComment(*Node.Tok)) { MinColumnToken = PreviousToken; break; } PreviousToken = Node.Tok; // Grab the last newline preceding a token in this unwrapped line. if (Node.Tok->NewlinesBefore > 0) MinColumnToken = Node.Tok; } if (PreviousToken && PreviousToken->is(tok::l_brace)) MinColumnToken = PreviousToken; return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, MinColumnToken); } void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { bool JustComments = Line->Tokens.empty(); for (FormatToken *Tok : CommentsBeforeNextToken) { // Line comments that belong to the same line comment section are put on the // same line since later we might want to reflow content between them. // Additional fine-grained breaking of line comment sections is controlled // by the class BreakableLineCommentSection in case it is desirable to keep // several line comment sections in the same unwrapped line. // // FIXME: Consider putting separate line comment sections as children to the // unwrapped line instead. Tok->ContinuesLineCommentSection = continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex); if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection) addUnwrappedLine(); pushToken(Tok); } if (NewlineBeforeNext && JustComments) addUnwrappedLine(); CommentsBeforeNextToken.clear(); } void UnwrappedLineParser::nextToken(int LevelDifference) { if (eof()) return; flushComments(isOnNewLine(*FormatTok)); pushToken(FormatTok); FormatToken *Previous = FormatTok; if (!Style.isJavaScript()) readToken(LevelDifference); else readTokenWithJavaScriptASI(); FormatTok->Previous = Previous; if (Style.isVerilog()) { // Blocks in Verilog can have `begin` and `end` instead of braces. For // keywords like `begin`, we can't treat them the same as left braces // because some contexts require one of them. For example structs use // braces and if blocks use keywords, and a left brace can occur in an if // statement, but it is not a block. For keywords like `end`, we simply // treat them the same as right braces. if (Keywords.isVerilogEnd(*FormatTok)) FormatTok->Tok.setKind(tok::r_brace); } } void UnwrappedLineParser::distributeComments( const SmallVectorImpl &Comments, const FormatToken *NextTok) { // Whether or not a line comment token continues a line is controlled by // the method continuesLineCommentSection, with the following caveat: // // Define a trail of Comments to be a nonempty proper postfix of Comments such // that each comment line from the trail is aligned with the next token, if // the next token exists. If a trail exists, the beginning of the maximal // trail is marked as a start of a new comment section. // // For example in this code: // // int a; // line about a // // line 1 about b // // line 2 about b // int b; // // the two lines about b form a maximal trail, so there are two sections, the // first one consisting of the single comment "// line about a" and the // second one consisting of the next two comments. if (Comments.empty()) return; bool ShouldPushCommentsInCurrentLine = true; bool HasTrailAlignedWithNextToken = false; unsigned StartOfTrailAlignedWithNextToken = 0; if (NextTok) { // We are skipping the first element intentionally. for (unsigned i = Comments.size() - 1; i > 0; --i) { if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { HasTrailAlignedWithNextToken = true; StartOfTrailAlignedWithNextToken = i; } } } for (unsigned i = 0, e = Comments.size(); i < e; ++i) { FormatToken *FormatTok = Comments[i]; if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { FormatTok->ContinuesLineCommentSection = false; } else { FormatTok->ContinuesLineCommentSection = continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); } if (!FormatTok->ContinuesLineCommentSection && (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { ShouldPushCommentsInCurrentLine = false; } if (ShouldPushCommentsInCurrentLine) pushToken(FormatTok); else CommentsBeforeNextToken.push_back(FormatTok); } } void UnwrappedLineParser::readToken(int LevelDifference) { SmallVector Comments; bool PreviousWasComment = false; bool FirstNonCommentOnLine = false; do { FormatTok = Tokens->getNextToken(); assert(FormatTok); while (FormatTok->isOneOf(TT_ConflictStart, TT_ConflictEnd, TT_ConflictAlternative)) { if (FormatTok->is(TT_ConflictStart)) conditionalCompilationStart(/*Unreachable=*/false); else if (FormatTok->is(TT_ConflictAlternative)) conditionalCompilationAlternative(); else if (FormatTok->is(TT_ConflictEnd)) conditionalCompilationEnd(); FormatTok = Tokens->getNextToken(); FormatTok->MustBreakBefore = true; FormatTok->MustBreakBeforeFinalized = true; } auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine, const FormatToken &Tok, bool PreviousWasComment) { auto IsFirstOnLine = [](const FormatToken &Tok) { return Tok.HasUnescapedNewline || Tok.IsFirst; }; // Consider preprocessor directives preceded by block comments as first // on line. if (PreviousWasComment) return FirstNonCommentOnLine || IsFirstOnLine(Tok); return IsFirstOnLine(Tok); }; FirstNonCommentOnLine = IsFirstNonCommentOnLine( FirstNonCommentOnLine, *FormatTok, PreviousWasComment); PreviousWasComment = FormatTok->is(tok::comment); while (!Line->InPPDirective && FormatTok->is(tok::hash) && (!Style.isVerilog() || Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) && FirstNonCommentOnLine) { distributeComments(Comments, FormatTok); Comments.clear(); // If there is an unfinished unwrapped line, we flush the preprocessor // directives only after that unwrapped line was finished later. bool SwitchToPreprocessorLines = !Line->Tokens.empty(); ScopedLineState BlockState(*this, SwitchToPreprocessorLines); assert((LevelDifference >= 0 || static_cast(-LevelDifference) <= Line->Level) && "LevelDifference makes Line->Level negative"); Line->Level += LevelDifference; // Comments stored before the preprocessor directive need to be output // before the preprocessor directive, at the same level as the // preprocessor directive, as we consider them to apply to the directive. if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash && PPBranchLevel > 0) { Line->Level += PPBranchLevel; } assert(Line->Level >= Line->UnbracedBodyLevel); Line->Level -= Line->UnbracedBodyLevel; flushComments(isOnNewLine(*FormatTok)); parsePPDirective(); PreviousWasComment = FormatTok->is(tok::comment); FirstNonCommentOnLine = IsFirstNonCommentOnLine( FirstNonCommentOnLine, *FormatTok, PreviousWasComment); } if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && !Line->InPPDirective) { continue; } if (FormatTok->is(tok::identifier) && Macros.defined(FormatTok->TokenText) && // FIXME: Allow expanding macros in preprocessor directives. !Line->InPPDirective) { FormatToken *ID = FormatTok; unsigned Position = Tokens->getPosition(); // To correctly parse the code, we need to replace the tokens of the macro // call with its expansion. auto PreCall = std::move(Line); Line.reset(new UnwrappedLine); bool OldInExpansion = InExpansion; InExpansion = true; // We parse the macro call into a new line. auto Args = parseMacroCall(); InExpansion = OldInExpansion; assert(Line->Tokens.front().Tok == ID); // And remember the unexpanded macro call tokens. auto UnexpandedLine = std::move(Line); // Reset to the old line. Line = std::move(PreCall); LLVM_DEBUG({ llvm::dbgs() << "Macro call: " << ID->TokenText << "("; if (Args) { llvm::dbgs() << "("; for (const auto &Arg : Args.value()) for (const auto &T : Arg) llvm::dbgs() << T->TokenText << " "; llvm::dbgs() << ")"; } llvm::dbgs() << "\n"; }); if (Macros.objectLike(ID->TokenText) && Args && !Macros.hasArity(ID->TokenText, Args->size())) { // The macro is either // - object-like, but we got argumnets, or // - overloaded to be both object-like and function-like, but none of // the function-like arities match the number of arguments. // Thus, expand as object-like macro. LLVM_DEBUG(llvm::dbgs() << "Macro \"" << ID->TokenText << "\" not overloaded for arity " << Args->size() << "or not function-like, using object-like overload."); Args.reset(); UnexpandedLine->Tokens.resize(1); Tokens->setPosition(Position); nextToken(); assert(!Args && Macros.objectLike(ID->TokenText)); } if ((!Args && Macros.objectLike(ID->TokenText)) || (Args && Macros.hasArity(ID->TokenText, Args->size()))) { // Next, we insert the expanded tokens in the token stream at the // current position, and continue parsing. Unexpanded[ID] = std::move(UnexpandedLine); SmallVector Expansion = Macros.expand(ID, std::move(Args)); if (!Expansion.empty()) FormatTok = Tokens->insertTokens(Expansion); LLVM_DEBUG({ llvm::dbgs() << "Expanded: "; for (const auto &T : Expansion) llvm::dbgs() << T->TokenText << " "; llvm::dbgs() << "\n"; }); } else { LLVM_DEBUG({ llvm::dbgs() << "Did not expand macro \"" << ID->TokenText << "\", because it was used "; if (Args) llvm::dbgs() << "with " << Args->size(); else llvm::dbgs() << "without"; llvm::dbgs() << " arguments, which doesn't match any definition.\n"; }); Tokens->setPosition(Position); FormatTok = ID; } } if (FormatTok->isNot(tok::comment)) { distributeComments(Comments, FormatTok); Comments.clear(); return; } Comments.push_back(FormatTok); } while (!eof()); distributeComments(Comments, nullptr); Comments.clear(); } namespace { template void pushTokens(Iterator Begin, Iterator End, llvm::SmallVectorImpl &Into) { for (auto I = Begin; I != End; ++I) { Into.push_back(I->Tok); for (const auto &Child : I->Children) pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into); } } } // namespace std::optional, 1>> UnwrappedLineParser::parseMacroCall() { std::optional, 1>> Args; assert(Line->Tokens.empty()); nextToken(); if (FormatTok->isNot(tok::l_paren)) return Args; unsigned Position = Tokens->getPosition(); FormatToken *Tok = FormatTok; nextToken(); Args.emplace(); auto ArgStart = std::prev(Line->Tokens.end()); int Parens = 0; do { switch (FormatTok->Tok.getKind()) { case tok::l_paren: ++Parens; nextToken(); break; case tok::r_paren: { if (Parens > 0) { --Parens; nextToken(); break; } Args->push_back({}); pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back()); nextToken(); return Args; } case tok::comma: { if (Parens > 0) { nextToken(); break; } Args->push_back({}); pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back()); nextToken(); ArgStart = std::prev(Line->Tokens.end()); break; } default: nextToken(); break; } } while (!eof()); Line->Tokens.resize(1); Tokens->setPosition(Position); FormatTok = Tok; return {}; } void UnwrappedLineParser::pushToken(FormatToken *Tok) { Line->Tokens.push_back(UnwrappedLineNode(Tok)); if (MustBreakBeforeNextToken) { Line->Tokens.back().Tok->MustBreakBefore = true; Line->Tokens.back().Tok->MustBreakBeforeFinalized = true; MustBreakBeforeNextToken = false; } } } // end namespace format } // end namespace clang diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp index 244f6ef2f53f..c45443d76e6b 100644 --- a/clang/lib/Sema/SemaConcept.cpp +++ b/clang/lib/Sema/SemaConcept.cpp @@ -1,1943 +1,1943 @@ //===-- SemaConcept.cpp - Semantic Analysis for Constraints and Concepts --===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements semantic analysis for C++ constraints and concepts. // //===----------------------------------------------------------------------===// #include "clang/Sema/SemaConcept.h" #include "TreeTransform.h" #include "clang/AST/ASTLambda.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/ExprConcepts.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/Basic/OperatorPrecedence.h" #include "clang/Sema/EnterExpressionEvaluationContext.h" #include "clang/Sema/Initialization.h" #include "clang/Sema/Overload.h" #include "clang/Sema/ScopeInfo.h" #include "clang/Sema/Sema.h" #include "clang/Sema/SemaDiagnostic.h" #include "clang/Sema/SemaInternal.h" #include "clang/Sema/Template.h" #include "clang/Sema/TemplateDeduction.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/StringExtras.h" #include using namespace clang; using namespace sema; namespace { class LogicalBinOp { SourceLocation Loc; OverloadedOperatorKind Op = OO_None; const Expr *LHS = nullptr; const Expr *RHS = nullptr; public: LogicalBinOp(const Expr *E) { if (auto *BO = dyn_cast(E)) { Op = BinaryOperator::getOverloadedOperator(BO->getOpcode()); LHS = BO->getLHS(); RHS = BO->getRHS(); Loc = BO->getExprLoc(); } else if (auto *OO = dyn_cast(E)) { // If OO is not || or && it might not have exactly 2 arguments. if (OO->getNumArgs() == 2) { Op = OO->getOperator(); LHS = OO->getArg(0); RHS = OO->getArg(1); Loc = OO->getOperatorLoc(); } } } bool isAnd() const { return Op == OO_AmpAmp; } bool isOr() const { return Op == OO_PipePipe; } explicit operator bool() const { return isAnd() || isOr(); } const Expr *getLHS() const { return LHS; } const Expr *getRHS() const { return RHS; } OverloadedOperatorKind getOp() const { return Op; } ExprResult recreateBinOp(Sema &SemaRef, ExprResult LHS) const { return recreateBinOp(SemaRef, LHS, const_cast(getRHS())); } ExprResult recreateBinOp(Sema &SemaRef, ExprResult LHS, ExprResult RHS) const { assert((isAnd() || isOr()) && "Not the right kind of op?"); assert((!LHS.isInvalid() && !RHS.isInvalid()) && "not good expressions?"); if (!LHS.isUsable() || !RHS.isUsable()) return ExprEmpty(); // We should just be able to 'normalize' these to the builtin Binary // Operator, since that is how they are evaluated in constriant checks. return BinaryOperator::Create(SemaRef.Context, LHS.get(), RHS.get(), BinaryOperator::getOverloadedOpcode(Op), SemaRef.Context.BoolTy, VK_PRValue, OK_Ordinary, Loc, FPOptionsOverride{}); } }; } bool Sema::CheckConstraintExpression(const Expr *ConstraintExpression, Token NextToken, bool *PossibleNonPrimary, bool IsTrailingRequiresClause) { // C++2a [temp.constr.atomic]p1 // ..E shall be a constant expression of type bool. ConstraintExpression = ConstraintExpression->IgnoreParenImpCasts(); if (LogicalBinOp BO = ConstraintExpression) { return CheckConstraintExpression(BO.getLHS(), NextToken, PossibleNonPrimary) && CheckConstraintExpression(BO.getRHS(), NextToken, PossibleNonPrimary); } else if (auto *C = dyn_cast(ConstraintExpression)) return CheckConstraintExpression(C->getSubExpr(), NextToken, PossibleNonPrimary); QualType Type = ConstraintExpression->getType(); auto CheckForNonPrimary = [&] { if (!PossibleNonPrimary) return; *PossibleNonPrimary = // We have the following case: // template requires func(0) struct S { }; // The user probably isn't aware of the parentheses required around // the function call, and we're only going to parse 'func' as the // primary-expression, and complain that it is of non-bool type. // // However, if we're in a lambda, this might also be: // [] requires var () {}; // Which also looks like a function call due to the lambda parentheses, // but unlike the first case, isn't an error, so this check is skipped. (NextToken.is(tok::l_paren) && (IsTrailingRequiresClause || (Type->isDependentType() && isa(ConstraintExpression) && !dyn_cast_if_present(getCurFunction())) || Type->isFunctionType() || Type->isSpecificBuiltinType(BuiltinType::Overload))) || // We have the following case: // template requires size_ == 0 struct S { }; // The user probably isn't aware of the parentheses required around // the binary operator, and we're only going to parse 'func' as the // first operand, and complain that it is of non-bool type. getBinOpPrecedence(NextToken.getKind(), /*GreaterThanIsOperator=*/true, getLangOpts().CPlusPlus11) > prec::LogicalAnd; }; // An atomic constraint! if (ConstraintExpression->isTypeDependent()) { CheckForNonPrimary(); return true; } if (!Context.hasSameUnqualifiedType(Type, Context.BoolTy)) { Diag(ConstraintExpression->getExprLoc(), diag::err_non_bool_atomic_constraint) << Type << ConstraintExpression->getSourceRange(); CheckForNonPrimary(); return false; } if (PossibleNonPrimary) *PossibleNonPrimary = false; return true; } namespace { struct SatisfactionStackRAII { Sema &SemaRef; bool Inserted = false; SatisfactionStackRAII(Sema &SemaRef, const NamedDecl *ND, const llvm::FoldingSetNodeID &FSNID) : SemaRef(SemaRef) { if (ND) { SemaRef.PushSatisfactionStackEntry(ND, FSNID); Inserted = true; } } ~SatisfactionStackRAII() { if (Inserted) SemaRef.PopSatisfactionStackEntry(); } }; } // namespace template static ExprResult calculateConstraintSatisfaction(Sema &S, const Expr *ConstraintExpr, ConstraintSatisfaction &Satisfaction, const ConstraintEvaluator &Evaluator); template static ExprResult calculateConstraintSatisfaction(Sema &S, const Expr *LHS, OverloadedOperatorKind Op, const Expr *RHS, ConstraintSatisfaction &Satisfaction, const ConstraintEvaluator &Evaluator) { size_t EffectiveDetailEndIndex = Satisfaction.Details.size(); ExprResult LHSRes = calculateConstraintSatisfaction(S, LHS, Satisfaction, Evaluator); if (LHSRes.isInvalid()) return ExprError(); bool IsLHSSatisfied = Satisfaction.IsSatisfied; if (Op == clang::OO_PipePipe && IsLHSSatisfied) // [temp.constr.op] p3 // A disjunction is a constraint taking two operands. To determine if // a disjunction is satisfied, the satisfaction of the first operand // is checked. If that is satisfied, the disjunction is satisfied. // Otherwise, the disjunction is satisfied if and only if the second // operand is satisfied. // LHS is instantiated while RHS is not. Skip creating invalid BinaryOp. return LHSRes; if (Op == clang::OO_AmpAmp && !IsLHSSatisfied) // [temp.constr.op] p2 // A conjunction is a constraint taking two operands. To determine if // a conjunction is satisfied, the satisfaction of the first operand // is checked. If that is not satisfied, the conjunction is not // satisfied. Otherwise, the conjunction is satisfied if and only if // the second operand is satisfied. // LHS is instantiated while RHS is not. Skip creating invalid BinaryOp. return LHSRes; ExprResult RHSRes = calculateConstraintSatisfaction(S, RHS, Satisfaction, Evaluator); if (RHSRes.isInvalid()) return ExprError(); bool IsRHSSatisfied = Satisfaction.IsSatisfied; // Current implementation adds diagnostic information about the falsity // of each false atomic constraint expression when it evaluates them. // When the evaluation results to `false || true`, the information // generated during the evaluation of left-hand side is meaningless // because the whole expression evaluates to true. // The following code removes the irrelevant diagnostic information. // FIXME: We should probably delay the addition of diagnostic information // until we know the entire expression is false. if (Op == clang::OO_PipePipe && IsRHSSatisfied) { auto EffectiveDetailEnd = Satisfaction.Details.begin(); std::advance(EffectiveDetailEnd, EffectiveDetailEndIndex); Satisfaction.Details.erase(EffectiveDetailEnd, Satisfaction.Details.end()); } if (!LHSRes.isUsable() || !RHSRes.isUsable()) return ExprEmpty(); return BinaryOperator::Create(S.Context, LHSRes.get(), RHSRes.get(), BinaryOperator::getOverloadedOpcode(Op), S.Context.BoolTy, VK_PRValue, OK_Ordinary, LHS->getBeginLoc(), FPOptionsOverride{}); } template static ExprResult calculateConstraintSatisfaction(Sema &S, const CXXFoldExpr *FE, ConstraintSatisfaction &Satisfaction, const ConstraintEvaluator &Evaluator) { bool Conjunction = FE->getOperator() == BinaryOperatorKind::BO_LAnd; size_t EffectiveDetailEndIndex = Satisfaction.Details.size(); ExprResult Out; if (FE->isLeftFold() && FE->getInit()) { Out = calculateConstraintSatisfaction(S, FE->getInit(), Satisfaction, Evaluator); if (Out.isInvalid()) return ExprError(); // If the first clause of a conjunction is not satisfied, // or if the first clause of a disjection is satisfied, // we have established satisfaction of the whole constraint // and we should not continue further. if (Conjunction != Satisfaction.IsSatisfied) return Out; } std::optional NumExpansions = Evaluator.EvaluateFoldExpandedConstraintSize(FE); if (!NumExpansions) return ExprError(); for (unsigned I = 0; I < *NumExpansions; I++) { Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(S, I); ExprResult Res = calculateConstraintSatisfaction(S, FE->getPattern(), Satisfaction, Evaluator); if (Res.isInvalid()) return ExprError(); bool IsRHSSatisfied = Satisfaction.IsSatisfied; if (!Conjunction && IsRHSSatisfied) { auto EffectiveDetailEnd = Satisfaction.Details.begin(); std::advance(EffectiveDetailEnd, EffectiveDetailEndIndex); Satisfaction.Details.erase(EffectiveDetailEnd, Satisfaction.Details.end()); } if (Out.isUnset()) Out = Res; else if (!Res.isUnset()) { Out = BinaryOperator::Create( S.Context, Out.get(), Res.get(), FE->getOperator(), S.Context.BoolTy, VK_PRValue, OK_Ordinary, FE->getBeginLoc(), FPOptionsOverride{}); } if (Conjunction != IsRHSSatisfied) return Out; } if (FE->isRightFold() && FE->getInit()) { ExprResult Res = calculateConstraintSatisfaction(S, FE->getInit(), Satisfaction, Evaluator); if (Out.isInvalid()) return ExprError(); if (Out.isUnset()) Out = Res; else if (!Res.isUnset()) { Out = BinaryOperator::Create( S.Context, Out.get(), Res.get(), FE->getOperator(), S.Context.BoolTy, VK_PRValue, OK_Ordinary, FE->getBeginLoc(), FPOptionsOverride{}); } } if (Out.isUnset()) { Satisfaction.IsSatisfied = Conjunction; Out = S.BuildEmptyCXXFoldExpr(FE->getBeginLoc(), FE->getOperator()); } return Out; } template static ExprResult calculateConstraintSatisfaction(Sema &S, const Expr *ConstraintExpr, ConstraintSatisfaction &Satisfaction, const ConstraintEvaluator &Evaluator) { ConstraintExpr = ConstraintExpr->IgnoreParenImpCasts(); if (LogicalBinOp BO = ConstraintExpr) return calculateConstraintSatisfaction( S, BO.getLHS(), BO.getOp(), BO.getRHS(), Satisfaction, Evaluator); if (auto *C = dyn_cast(ConstraintExpr)) { // These aren't evaluated, so we don't care about cleanups, so we can just // evaluate these as if the cleanups didn't exist. return calculateConstraintSatisfaction(S, C->getSubExpr(), Satisfaction, Evaluator); } if (auto *FE = dyn_cast(ConstraintExpr); FE && S.getLangOpts().CPlusPlus26 && (FE->getOperator() == BinaryOperatorKind::BO_LAnd || FE->getOperator() == BinaryOperatorKind::BO_LOr)) { return calculateConstraintSatisfaction(S, FE, Satisfaction, Evaluator); } // An atomic constraint expression ExprResult SubstitutedAtomicExpr = Evaluator.EvaluateAtomicConstraint(ConstraintExpr); if (SubstitutedAtomicExpr.isInvalid()) return ExprError(); if (!SubstitutedAtomicExpr.isUsable()) // Evaluator has decided satisfaction without yielding an expression. return ExprEmpty(); // We don't have the ability to evaluate this, since it contains a // RecoveryExpr, so we want to fail overload resolution. Otherwise, // we'd potentially pick up a different overload, and cause confusing // diagnostics. SO, add a failure detail that will cause us to make this // overload set not viable. if (SubstitutedAtomicExpr.get()->containsErrors()) { Satisfaction.IsSatisfied = false; Satisfaction.ContainsErrors = true; PartialDiagnostic Msg = S.PDiag(diag::note_constraint_references_error); SmallString<128> DiagString; DiagString = ": "; Msg.EmitToString(S.getDiagnostics(), DiagString); unsigned MessageSize = DiagString.size(); char *Mem = new (S.Context) char[MessageSize]; memcpy(Mem, DiagString.c_str(), MessageSize); Satisfaction.Details.emplace_back( new (S.Context) ConstraintSatisfaction::SubstitutionDiagnostic{ SubstitutedAtomicExpr.get()->getBeginLoc(), StringRef(Mem, MessageSize)}); return SubstitutedAtomicExpr; } EnterExpressionEvaluationContext ConstantEvaluated( S, Sema::ExpressionEvaluationContext::ConstantEvaluated); SmallVector EvaluationDiags; Expr::EvalResult EvalResult; EvalResult.Diag = &EvaluationDiags; if (!SubstitutedAtomicExpr.get()->EvaluateAsConstantExpr(EvalResult, S.Context) || !EvaluationDiags.empty()) { // C++2a [temp.constr.atomic]p1 // ...E shall be a constant expression of type bool. S.Diag(SubstitutedAtomicExpr.get()->getBeginLoc(), diag::err_non_constant_constraint_expression) << SubstitutedAtomicExpr.get()->getSourceRange(); for (const PartialDiagnosticAt &PDiag : EvaluationDiags) S.Diag(PDiag.first, PDiag.second); return ExprError(); } assert(EvalResult.Val.isInt() && "evaluating bool expression didn't produce int"); Satisfaction.IsSatisfied = EvalResult.Val.getInt().getBoolValue(); if (!Satisfaction.IsSatisfied) Satisfaction.Details.emplace_back(SubstitutedAtomicExpr.get()); return SubstitutedAtomicExpr; } static bool DiagRecursiveConstraintEval(Sema &S, llvm::FoldingSetNodeID &ID, const NamedDecl *Templ, const Expr *E, const MultiLevelTemplateArgumentList &MLTAL) { E->Profile(ID, S.Context, /*Canonical=*/true); for (const auto &List : MLTAL) for (const auto &TemplateArg : List.Args) TemplateArg.Profile(ID, S.Context); // Note that we have to do this with our own collection, because there are // times where a constraint-expression check can cause us to need to evaluate // other constriants that are unrelated, such as when evaluating a recovery // expression, or when trying to determine the constexpr-ness of special // members. Otherwise we could just use the // Sema::InstantiatingTemplate::isAlreadyBeingInstantiated function. if (S.SatisfactionStackContains(Templ, ID)) { S.Diag(E->getExprLoc(), diag::err_constraint_depends_on_self) << const_cast(E) << E->getSourceRange(); return true; } return false; } static ExprResult calculateConstraintSatisfaction( Sema &S, const NamedDecl *Template, SourceLocation TemplateNameLoc, const MultiLevelTemplateArgumentList &MLTAL, const Expr *ConstraintExpr, ConstraintSatisfaction &Satisfaction) { struct ConstraintEvaluator { Sema &S; const NamedDecl *Template; SourceLocation TemplateNameLoc; const MultiLevelTemplateArgumentList &MLTAL; ConstraintSatisfaction &Satisfaction; ExprResult EvaluateAtomicConstraint(const Expr *AtomicExpr) const { EnterExpressionEvaluationContext ConstantEvaluated( S, Sema::ExpressionEvaluationContext::ConstantEvaluated, Sema::ReuseLambdaContextDecl); // Atomic constraint - substitute arguments and check satisfaction. ExprResult SubstitutedExpression; { TemplateDeductionInfo Info(TemplateNameLoc); Sema::InstantiatingTemplate Inst( S, AtomicExpr->getBeginLoc(), Sema::InstantiatingTemplate::ConstraintSubstitution{}, const_cast(Template), Info, AtomicExpr->getSourceRange()); if (Inst.isInvalid()) return ExprError(); llvm::FoldingSetNodeID ID; if (Template && DiagRecursiveConstraintEval(S, ID, Template, AtomicExpr, MLTAL)) { Satisfaction.IsSatisfied = false; Satisfaction.ContainsErrors = true; return ExprEmpty(); } SatisfactionStackRAII StackRAII(S, Template, ID); // We do not want error diagnostics escaping here. Sema::SFINAETrap Trap(S); SubstitutedExpression = S.SubstConstraintExpr(const_cast(AtomicExpr), MLTAL); if (SubstitutedExpression.isInvalid() || Trap.hasErrorOccurred()) { // C++2a [temp.constr.atomic]p1 // ...If substitution results in an invalid type or expression, the // constraint is not satisfied. if (!Trap.hasErrorOccurred()) // A non-SFINAE error has occurred as a result of this // substitution. return ExprError(); PartialDiagnosticAt SubstDiag{SourceLocation(), PartialDiagnostic::NullDiagnostic()}; Info.takeSFINAEDiagnostic(SubstDiag); // FIXME: Concepts: This is an unfortunate consequence of there // being no serialization code for PartialDiagnostics and the fact // that serializing them would likely take a lot more storage than // just storing them as strings. We would still like, in the // future, to serialize the proper PartialDiagnostic as serializing // it as a string defeats the purpose of the diagnostic mechanism. SmallString<128> DiagString; DiagString = ": "; SubstDiag.second.EmitToString(S.getDiagnostics(), DiagString); unsigned MessageSize = DiagString.size(); char *Mem = new (S.Context) char[MessageSize]; memcpy(Mem, DiagString.c_str(), MessageSize); Satisfaction.Details.emplace_back( new (S.Context) ConstraintSatisfaction::SubstitutionDiagnostic{ SubstDiag.first, StringRef(Mem, MessageSize)}); Satisfaction.IsSatisfied = false; return ExprEmpty(); } } if (!S.CheckConstraintExpression(SubstitutedExpression.get())) return ExprError(); // [temp.constr.atomic]p3: To determine if an atomic constraint is // satisfied, the parameter mapping and template arguments are first // substituted into its expression. If substitution results in an // invalid type or expression, the constraint is not satisfied. // Otherwise, the lvalue-to-rvalue conversion is performed if necessary, // and E shall be a constant expression of type bool. // // Perform the L to R Value conversion if necessary. We do so for all // non-PRValue categories, else we fail to extend the lifetime of // temporaries, and that fails the constant expression check. if (!SubstitutedExpression.get()->isPRValue()) SubstitutedExpression = ImplicitCastExpr::Create( S.Context, SubstitutedExpression.get()->getType(), CK_LValueToRValue, SubstitutedExpression.get(), /*BasePath=*/nullptr, VK_PRValue, FPOptionsOverride()); return SubstitutedExpression; } std::optional EvaluateFoldExpandedConstraintSize(const CXXFoldExpr *FE) const { // We should ignore errors in the presence of packs of different size. Sema::SFINAETrap Trap(S); Expr *Pattern = FE->getPattern(); SmallVector Unexpanded; S.collectUnexpandedParameterPacks(Pattern, Unexpanded); assert(!Unexpanded.empty() && "Pack expansion without parameter packs?"); bool Expand = true; bool RetainExpansion = false; std::optional OrigNumExpansions = FE->getNumExpansions(), NumExpansions = OrigNumExpansions; if (S.CheckParameterPacksForExpansion( FE->getEllipsisLoc(), Pattern->getSourceRange(), Unexpanded, MLTAL, Expand, RetainExpansion, NumExpansions) || !Expand || RetainExpansion) return std::nullopt; if (NumExpansions && S.getLangOpts().BracketDepth < NumExpansions) { S.Diag(FE->getEllipsisLoc(), clang::diag::err_fold_expression_limit_exceeded) << *NumExpansions << S.getLangOpts().BracketDepth << FE->getSourceRange(); S.Diag(FE->getEllipsisLoc(), diag::note_bracket_depth); return std::nullopt; } return NumExpansions; } }; return calculateConstraintSatisfaction( S, ConstraintExpr, Satisfaction, ConstraintEvaluator{S, Template, TemplateNameLoc, MLTAL, Satisfaction}); } static bool CheckConstraintSatisfaction( Sema &S, const NamedDecl *Template, ArrayRef ConstraintExprs, llvm::SmallVectorImpl &Converted, const MultiLevelTemplateArgumentList &TemplateArgsLists, SourceRange TemplateIDRange, ConstraintSatisfaction &Satisfaction) { if (ConstraintExprs.empty()) { Satisfaction.IsSatisfied = true; return false; } if (TemplateArgsLists.isAnyArgInstantiationDependent()) { // No need to check satisfaction for dependent constraint expressions. Satisfaction.IsSatisfied = true; return false; } ArrayRef TemplateArgs = TemplateArgsLists.getNumSubstitutedLevels() > 0 ? TemplateArgsLists.getOutermost() : ArrayRef {}; Sema::InstantiatingTemplate Inst(S, TemplateIDRange.getBegin(), Sema::InstantiatingTemplate::ConstraintsCheck{}, const_cast(Template), TemplateArgs, TemplateIDRange); if (Inst.isInvalid()) return true; for (const Expr *ConstraintExpr : ConstraintExprs) { ExprResult Res = calculateConstraintSatisfaction( S, Template, TemplateIDRange.getBegin(), TemplateArgsLists, ConstraintExpr, Satisfaction); if (Res.isInvalid()) return true; Converted.push_back(Res.get()); if (!Satisfaction.IsSatisfied) { // Backfill the 'converted' list with nulls so we can keep the Converted // and unconverted lists in sync. Converted.append(ConstraintExprs.size() - Converted.size(), nullptr); // [temp.constr.op] p2 // [...] To determine if a conjunction is satisfied, the satisfaction // of the first operand is checked. If that is not satisfied, the // conjunction is not satisfied. [...] return false; } } return false; } bool Sema::CheckConstraintSatisfaction( const NamedDecl *Template, ArrayRef ConstraintExprs, llvm::SmallVectorImpl &ConvertedConstraints, const MultiLevelTemplateArgumentList &TemplateArgsLists, SourceRange TemplateIDRange, ConstraintSatisfaction &OutSatisfaction) { if (ConstraintExprs.empty()) { OutSatisfaction.IsSatisfied = true; return false; } if (!Template) { return ::CheckConstraintSatisfaction( *this, nullptr, ConstraintExprs, ConvertedConstraints, TemplateArgsLists, TemplateIDRange, OutSatisfaction); } // Invalid templates could make their way here. Substituting them could result // in dependent expressions. if (Template->isInvalidDecl()) { OutSatisfaction.IsSatisfied = false; return true; } // A list of the template argument list flattened in a predictible manner for // the purposes of caching. The ConstraintSatisfaction type is in AST so it // has no access to the MultiLevelTemplateArgumentList, so this has to happen // here. llvm::SmallVector FlattenedArgs; for (auto List : TemplateArgsLists) FlattenedArgs.insert(FlattenedArgs.end(), List.Args.begin(), List.Args.end()); llvm::FoldingSetNodeID ID; ConstraintSatisfaction::Profile(ID, Context, Template, FlattenedArgs); void *InsertPos; if (auto *Cached = SatisfactionCache.FindNodeOrInsertPos(ID, InsertPos)) { OutSatisfaction = *Cached; return false; } auto Satisfaction = std::make_unique(Template, FlattenedArgs); if (::CheckConstraintSatisfaction(*this, Template, ConstraintExprs, ConvertedConstraints, TemplateArgsLists, TemplateIDRange, *Satisfaction)) { OutSatisfaction = *Satisfaction; return true; } if (auto *Cached = SatisfactionCache.FindNodeOrInsertPos(ID, InsertPos)) { // The evaluation of this constraint resulted in us trying to re-evaluate it // recursively. This isn't really possible, except we try to form a // RecoveryExpr as a part of the evaluation. If this is the case, just // return the 'cached' version (which will have the same result), and save // ourselves the extra-insert. If it ever becomes possible to legitimately // recursively check a constraint, we should skip checking the 'inner' one // above, and replace the cached version with this one, as it would be more // specific. OutSatisfaction = *Cached; return false; } // Else we can simply add this satisfaction to the list. OutSatisfaction = *Satisfaction; // We cannot use InsertPos here because CheckConstraintSatisfaction might have // invalidated it. // Note that entries of SatisfactionCache are deleted in Sema's destructor. SatisfactionCache.InsertNode(Satisfaction.release()); return false; } bool Sema::CheckConstraintSatisfaction(const Expr *ConstraintExpr, ConstraintSatisfaction &Satisfaction) { struct ConstraintEvaluator { Sema &S; ExprResult EvaluateAtomicConstraint(const Expr *AtomicExpr) const { return S.PerformContextuallyConvertToBool(const_cast(AtomicExpr)); } std::optional EvaluateFoldExpandedConstraintSize(const CXXFoldExpr *FE) const { return 0; } }; return calculateConstraintSatisfaction(*this, ConstraintExpr, Satisfaction, ConstraintEvaluator{*this}) .isInvalid(); } bool Sema::addInstantiatedCapturesToScope( FunctionDecl *Function, const FunctionDecl *PatternDecl, LocalInstantiationScope &Scope, const MultiLevelTemplateArgumentList &TemplateArgs) { const auto *LambdaClass = cast(Function)->getParent(); const auto *LambdaPattern = cast(PatternDecl)->getParent(); unsigned Instantiated = 0; auto AddSingleCapture = [&](const ValueDecl *CapturedPattern, unsigned Index) { ValueDecl *CapturedVar = LambdaClass->getCapture(Index)->getCapturedVar(); if (CapturedVar->isInitCapture()) Scope.InstantiatedLocal(CapturedPattern, CapturedVar); }; for (const LambdaCapture &CapturePattern : LambdaPattern->captures()) { if (!CapturePattern.capturesVariable()) { Instantiated++; continue; } const ValueDecl *CapturedPattern = CapturePattern.getCapturedVar(); if (!CapturedPattern->isParameterPack()) { AddSingleCapture(CapturedPattern, Instantiated++); } else { Scope.MakeInstantiatedLocalArgPack(CapturedPattern); std::optional NumArgumentsInExpansion = getNumArgumentsInExpansion(CapturedPattern->getType(), TemplateArgs); if (!NumArgumentsInExpansion) continue; for (unsigned Arg = 0; Arg < *NumArgumentsInExpansion; ++Arg) AddSingleCapture(CapturedPattern, Instantiated++); } } return false; } bool Sema::SetupConstraintScope( FunctionDecl *FD, std::optional> TemplateArgs, const MultiLevelTemplateArgumentList &MLTAL, LocalInstantiationScope &Scope) { if (FD->isTemplateInstantiation() && FD->getPrimaryTemplate()) { FunctionTemplateDecl *PrimaryTemplate = FD->getPrimaryTemplate(); InstantiatingTemplate Inst( *this, FD->getPointOfInstantiation(), Sema::InstantiatingTemplate::ConstraintsCheck{}, PrimaryTemplate, TemplateArgs ? *TemplateArgs : ArrayRef{}, SourceRange()); if (Inst.isInvalid()) return true; // addInstantiatedParametersToScope creates a map of 'uninstantiated' to // 'instantiated' parameters and adds it to the context. For the case where // this function is a template being instantiated NOW, we also need to add // the list of current template arguments to the list so that they also can // be picked out of the map. if (auto *SpecArgs = FD->getTemplateSpecializationArgs()) { MultiLevelTemplateArgumentList JustTemplArgs(FD, SpecArgs->asArray(), /*Final=*/false); if (addInstantiatedParametersToScope( FD, PrimaryTemplate->getTemplatedDecl(), Scope, JustTemplArgs)) return true; } // If this is a member function, make sure we get the parameters that // reference the original primary template. // We walk up the instantiated template chain so that nested lambdas get // handled properly. // We should only collect instantiated parameters from the primary template. // Otherwise, we may have mismatched template parameter depth! if (FunctionTemplateDecl *FromMemTempl = PrimaryTemplate->getInstantiatedFromMemberTemplate()) { while (FromMemTempl->getInstantiatedFromMemberTemplate()) FromMemTempl = FromMemTempl->getInstantiatedFromMemberTemplate(); if (addInstantiatedParametersToScope(FD, FromMemTempl->getTemplatedDecl(), Scope, MLTAL)) return true; } return false; } if (FD->getTemplatedKind() == FunctionDecl::TK_MemberSpecialization || FD->getTemplatedKind() == FunctionDecl::TK_DependentNonTemplate) { FunctionDecl *InstantiatedFrom = FD->getTemplatedKind() == FunctionDecl::TK_MemberSpecialization ? FD->getInstantiatedFromMemberFunction() : FD->getInstantiatedFromDecl(); InstantiatingTemplate Inst( *this, FD->getPointOfInstantiation(), Sema::InstantiatingTemplate::ConstraintsCheck{}, InstantiatedFrom, TemplateArgs ? *TemplateArgs : ArrayRef{}, SourceRange()); if (Inst.isInvalid()) return true; // Case where this was not a template, but instantiated as a // child-function. if (addInstantiatedParametersToScope(FD, InstantiatedFrom, Scope, MLTAL)) return true; } return false; } // This function collects all of the template arguments for the purposes of // constraint-instantiation and checking. std::optional Sema::SetupConstraintCheckingTemplateArgumentsAndScope( FunctionDecl *FD, std::optional> TemplateArgs, LocalInstantiationScope &Scope) { MultiLevelTemplateArgumentList MLTAL; // Collect the list of template arguments relative to the 'primary' template. // We need the entire list, since the constraint is completely uninstantiated // at this point. MLTAL = getTemplateInstantiationArgs(FD, FD->getLexicalDeclContext(), /*Final=*/false, /*Innermost=*/std::nullopt, /*RelativeToPrimary=*/true, /*Pattern=*/nullptr, /*ForConstraintInstantiation=*/true); if (SetupConstraintScope(FD, TemplateArgs, MLTAL, Scope)) return std::nullopt; return MLTAL; } bool Sema::CheckFunctionConstraints(const FunctionDecl *FD, ConstraintSatisfaction &Satisfaction, SourceLocation UsageLoc, bool ForOverloadResolution) { // Don't check constraints if the function is dependent. Also don't check if // this is a function template specialization, as the call to // CheckinstantiatedFunctionTemplateConstraints after this will check it // better. if (FD->isDependentContext() || FD->getTemplatedKind() == FunctionDecl::TK_FunctionTemplateSpecialization) { Satisfaction.IsSatisfied = true; return false; } // A lambda conversion operator has the same constraints as the call operator // and constraints checking relies on whether we are in a lambda call operator // (and may refer to its parameters), so check the call operator instead. // Note that the declarations outside of the lambda should also be // considered. Turning on the 'ForOverloadResolution' flag results in the // LocalInstantiationScope not looking into its parents, but we can still // access Decls from the parents while building a lambda RAII scope later. if (const auto *MD = dyn_cast(FD); MD && isLambdaConversionOperator(const_cast(MD))) return CheckFunctionConstraints(MD->getParent()->getLambdaCallOperator(), Satisfaction, UsageLoc, /*ShouldAddDeclsFromParentScope=*/true); DeclContext *CtxToSave = const_cast(FD); while (isLambdaCallOperator(CtxToSave) || FD->isTransparentContext()) { if (isLambdaCallOperator(CtxToSave)) CtxToSave = CtxToSave->getParent()->getParent(); else CtxToSave = CtxToSave->getNonTransparentContext(); } ContextRAII SavedContext{*this, CtxToSave}; LocalInstantiationScope Scope(*this, !ForOverloadResolution); std::optional MLTAL = SetupConstraintCheckingTemplateArgumentsAndScope( const_cast(FD), {}, Scope); if (!MLTAL) return true; Qualifiers ThisQuals; CXXRecordDecl *Record = nullptr; if (auto *Method = dyn_cast(FD)) { ThisQuals = Method->getMethodQualifiers(); Record = const_cast(Method->getParent()); } CXXThisScopeRAII ThisScope(*this, Record, ThisQuals, Record != nullptr); LambdaScopeForCallOperatorInstantiationRAII LambdaScope( *this, const_cast(FD), *MLTAL, Scope, ForOverloadResolution); return CheckConstraintSatisfaction( FD, {FD->getTrailingRequiresClause()}, *MLTAL, SourceRange(UsageLoc.isValid() ? UsageLoc : FD->getLocation()), Satisfaction); } // Figure out the to-translation-unit depth for this function declaration for // the purpose of seeing if they differ by constraints. This isn't the same as // getTemplateDepth, because it includes already instantiated parents. static unsigned CalculateTemplateDepthForConstraints(Sema &S, const NamedDecl *ND, bool SkipForSpecialization = false) { MultiLevelTemplateArgumentList MLTAL = S.getTemplateInstantiationArgs( ND, ND->getLexicalDeclContext(), /*Final=*/false, /*Innermost=*/std::nullopt, /*RelativeToPrimary=*/true, /*Pattern=*/nullptr, /*ForConstraintInstantiation=*/true, SkipForSpecialization); return MLTAL.getNumLevels(); } namespace { class AdjustConstraintDepth : public TreeTransform { unsigned TemplateDepth = 0; public: using inherited = TreeTransform; AdjustConstraintDepth(Sema &SemaRef, unsigned TemplateDepth) : inherited(SemaRef), TemplateDepth(TemplateDepth) {} using inherited::TransformTemplateTypeParmType; QualType TransformTemplateTypeParmType(TypeLocBuilder &TLB, TemplateTypeParmTypeLoc TL, bool) { const TemplateTypeParmType *T = TL.getTypePtr(); TemplateTypeParmDecl *NewTTPDecl = nullptr; if (TemplateTypeParmDecl *OldTTPDecl = T->getDecl()) NewTTPDecl = cast_or_null( TransformDecl(TL.getNameLoc(), OldTTPDecl)); QualType Result = getSema().Context.getTemplateTypeParmType( T->getDepth() + TemplateDepth, T->getIndex(), T->isParameterPack(), NewTTPDecl); TemplateTypeParmTypeLoc NewTL = TLB.push(Result); NewTL.setNameLoc(TL.getNameLoc()); return Result; } }; } // namespace static const Expr *SubstituteConstraintExpressionWithoutSatisfaction( Sema &S, const Sema::TemplateCompareNewDeclInfo &DeclInfo, const Expr *ConstrExpr) { MultiLevelTemplateArgumentList MLTAL = S.getTemplateInstantiationArgs( DeclInfo.getDecl(), DeclInfo.getLexicalDeclContext(), /*Final=*/false, /*Innermost=*/std::nullopt, /*RelativeToPrimary=*/true, /*Pattern=*/nullptr, /*ForConstraintInstantiation=*/true, /*SkipForSpecialization*/ false); if (MLTAL.getNumSubstitutedLevels() == 0) return ConstrExpr; Sema::SFINAETrap SFINAE(S, /*AccessCheckingSFINAE=*/false); Sema::InstantiatingTemplate Inst( S, DeclInfo.getLocation(), Sema::InstantiatingTemplate::ConstraintNormalization{}, const_cast(DeclInfo.getDecl()), SourceRange{}); if (Inst.isInvalid()) return nullptr; // Set up a dummy 'instantiation' scope in the case of reference to function // parameters that the surrounding function hasn't been instantiated yet. Note // this may happen while we're comparing two templates' constraint // equivalence. - LocalInstantiationScope ScopeForParameters(S); + LocalInstantiationScope ScopeForParameters(S, /*CombineWithOuterScope=*/true); if (auto *FD = DeclInfo.getDecl()->getAsFunction()) for (auto *PVD : FD->parameters()) { if (!PVD->isParameterPack()) { ScopeForParameters.InstantiatedLocal(PVD, PVD); continue; } // This is hacky: we're mapping the parameter pack to a size-of-1 argument // to avoid building SubstTemplateTypeParmPackTypes for // PackExpansionTypes. The SubstTemplateTypeParmPackType node would // otherwise reference the AssociatedDecl of the template arguments, which // is, in this case, the template declaration. // // However, as we are in the process of comparing potential // re-declarations, the canonical declaration is the declaration itself at // this point. So if we didn't expand these packs, we would end up with an // incorrect profile difference because we will be profiling the // canonical types! // // FIXME: Improve the "no-transform" machinery in FindInstantiatedDecl so // that we can eliminate the Scope in the cases where the declarations are // not necessarily instantiated. It would also benefit the noexcept // specifier comparison. ScopeForParameters.MakeInstantiatedLocalArgPack(PVD); ScopeForParameters.InstantiatedLocalPackArg(PVD, PVD); } std::optional ThisScope; // See TreeTransform::RebuildTemplateSpecializationType. A context scope is // essential for having an injected class as the canonical type for a template // specialization type at the rebuilding stage. This guarantees that, for // out-of-line definitions, injected class name types and their equivalent // template specializations can be profiled to the same value, which makes it // possible that e.g. constraints involving C> and C are // perceived identical. std::optional ContextScope; if (auto *RD = dyn_cast(DeclInfo.getDeclContext())) { ThisScope.emplace(S, const_cast(RD), Qualifiers()); ContextScope.emplace(S, const_cast(cast(RD)), /*NewThisContext=*/false); } ExprResult SubstConstr = S.SubstConstraintExprWithoutSatisfaction( const_cast(ConstrExpr), MLTAL); if (SFINAE.hasErrorOccurred() || !SubstConstr.isUsable()) return nullptr; return SubstConstr.get(); } bool Sema::AreConstraintExpressionsEqual(const NamedDecl *Old, const Expr *OldConstr, const TemplateCompareNewDeclInfo &New, const Expr *NewConstr) { if (OldConstr == NewConstr) return true; // C++ [temp.constr.decl]p4 if (Old && !New.isInvalid() && !New.ContainsDecl(Old) && Old->getLexicalDeclContext() != New.getLexicalDeclContext()) { if (const Expr *SubstConstr = SubstituteConstraintExpressionWithoutSatisfaction(*this, Old, OldConstr)) OldConstr = SubstConstr; else return false; if (const Expr *SubstConstr = SubstituteConstraintExpressionWithoutSatisfaction(*this, New, NewConstr)) NewConstr = SubstConstr; else return false; } llvm::FoldingSetNodeID ID1, ID2; OldConstr->Profile(ID1, Context, /*Canonical=*/true); NewConstr->Profile(ID2, Context, /*Canonical=*/true); return ID1 == ID2; } bool Sema::FriendConstraintsDependOnEnclosingTemplate(const FunctionDecl *FD) { assert(FD->getFriendObjectKind() && "Must be a friend!"); // The logic for non-templates is handled in ASTContext::isSameEntity, so we // don't have to bother checking 'DependsOnEnclosingTemplate' for a // non-function-template. assert(FD->getDescribedFunctionTemplate() && "Non-function templates don't need to be checked"); SmallVector ACs; FD->getDescribedFunctionTemplate()->getAssociatedConstraints(ACs); unsigned OldTemplateDepth = CalculateTemplateDepthForConstraints(*this, FD); for (const Expr *Constraint : ACs) if (ConstraintExpressionDependsOnEnclosingTemplate(FD, OldTemplateDepth, Constraint)) return true; return false; } bool Sema::EnsureTemplateArgumentListConstraints( TemplateDecl *TD, const MultiLevelTemplateArgumentList &TemplateArgsLists, SourceRange TemplateIDRange) { ConstraintSatisfaction Satisfaction; llvm::SmallVector AssociatedConstraints; TD->getAssociatedConstraints(AssociatedConstraints); if (CheckConstraintSatisfaction(TD, AssociatedConstraints, TemplateArgsLists, TemplateIDRange, Satisfaction)) return true; if (!Satisfaction.IsSatisfied) { SmallString<128> TemplateArgString; TemplateArgString = " "; TemplateArgString += getTemplateArgumentBindingsText( TD->getTemplateParameters(), TemplateArgsLists.getInnermost().data(), TemplateArgsLists.getInnermost().size()); Diag(TemplateIDRange.getBegin(), diag::err_template_arg_list_constraints_not_satisfied) << (int)getTemplateNameKindForDiagnostics(TemplateName(TD)) << TD << TemplateArgString << TemplateIDRange; DiagnoseUnsatisfiedConstraint(Satisfaction); return true; } return false; } bool Sema::CheckInstantiatedFunctionTemplateConstraints( SourceLocation PointOfInstantiation, FunctionDecl *Decl, ArrayRef TemplateArgs, ConstraintSatisfaction &Satisfaction) { // In most cases we're not going to have constraints, so check for that first. FunctionTemplateDecl *Template = Decl->getPrimaryTemplate(); // Note - code synthesis context for the constraints check is created // inside CheckConstraintsSatisfaction. SmallVector TemplateAC; Template->getAssociatedConstraints(TemplateAC); if (TemplateAC.empty()) { Satisfaction.IsSatisfied = true; return false; } // Enter the scope of this instantiation. We don't use // PushDeclContext because we don't have a scope. Sema::ContextRAII savedContext(*this, Decl); LocalInstantiationScope Scope(*this); std::optional MLTAL = SetupConstraintCheckingTemplateArgumentsAndScope(Decl, TemplateArgs, Scope); if (!MLTAL) return true; Qualifiers ThisQuals; CXXRecordDecl *Record = nullptr; if (auto *Method = dyn_cast(Decl)) { ThisQuals = Method->getMethodQualifiers(); Record = Method->getParent(); } CXXThisScopeRAII ThisScope(*this, Record, ThisQuals, Record != nullptr); LambdaScopeForCallOperatorInstantiationRAII LambdaScope( *this, const_cast(Decl), *MLTAL, Scope); llvm::SmallVector Converted; return CheckConstraintSatisfaction(Template, TemplateAC, Converted, *MLTAL, PointOfInstantiation, Satisfaction); } static void diagnoseUnsatisfiedRequirement(Sema &S, concepts::ExprRequirement *Req, bool First) { assert(!Req->isSatisfied() && "Diagnose() can only be used on an unsatisfied requirement"); switch (Req->getSatisfactionStatus()) { case concepts::ExprRequirement::SS_Dependent: llvm_unreachable("Diagnosing a dependent requirement"); break; case concepts::ExprRequirement::SS_ExprSubstitutionFailure: { auto *SubstDiag = Req->getExprSubstitutionDiagnostic(); if (!SubstDiag->DiagMessage.empty()) S.Diag(SubstDiag->DiagLoc, diag::note_expr_requirement_expr_substitution_error) << (int)First << SubstDiag->SubstitutedEntity << SubstDiag->DiagMessage; else S.Diag(SubstDiag->DiagLoc, diag::note_expr_requirement_expr_unknown_substitution_error) << (int)First << SubstDiag->SubstitutedEntity; break; } case concepts::ExprRequirement::SS_NoexceptNotMet: S.Diag(Req->getNoexceptLoc(), diag::note_expr_requirement_noexcept_not_met) << (int)First << Req->getExpr(); break; case concepts::ExprRequirement::SS_TypeRequirementSubstitutionFailure: { auto *SubstDiag = Req->getReturnTypeRequirement().getSubstitutionDiagnostic(); if (!SubstDiag->DiagMessage.empty()) S.Diag(SubstDiag->DiagLoc, diag::note_expr_requirement_type_requirement_substitution_error) << (int)First << SubstDiag->SubstitutedEntity << SubstDiag->DiagMessage; else S.Diag(SubstDiag->DiagLoc, diag::note_expr_requirement_type_requirement_unknown_substitution_error) << (int)First << SubstDiag->SubstitutedEntity; break; } case concepts::ExprRequirement::SS_ConstraintsNotSatisfied: { ConceptSpecializationExpr *ConstraintExpr = Req->getReturnTypeRequirementSubstitutedConstraintExpr(); if (ConstraintExpr->getTemplateArgsAsWritten()->NumTemplateArgs == 1) { // A simple case - expr type is the type being constrained and the concept // was not provided arguments. Expr *e = Req->getExpr(); S.Diag(e->getBeginLoc(), diag::note_expr_requirement_constraints_not_satisfied_simple) << (int)First << S.Context.getReferenceQualifiedType(e) << ConstraintExpr->getNamedConcept(); } else { S.Diag(ConstraintExpr->getBeginLoc(), diag::note_expr_requirement_constraints_not_satisfied) << (int)First << ConstraintExpr; } S.DiagnoseUnsatisfiedConstraint(ConstraintExpr->getSatisfaction()); break; } case concepts::ExprRequirement::SS_Satisfied: llvm_unreachable("We checked this above"); } } static void diagnoseUnsatisfiedRequirement(Sema &S, concepts::TypeRequirement *Req, bool First) { assert(!Req->isSatisfied() && "Diagnose() can only be used on an unsatisfied requirement"); switch (Req->getSatisfactionStatus()) { case concepts::TypeRequirement::SS_Dependent: llvm_unreachable("Diagnosing a dependent requirement"); return; case concepts::TypeRequirement::SS_SubstitutionFailure: { auto *SubstDiag = Req->getSubstitutionDiagnostic(); if (!SubstDiag->DiagMessage.empty()) S.Diag(SubstDiag->DiagLoc, diag::note_type_requirement_substitution_error) << (int)First << SubstDiag->SubstitutedEntity << SubstDiag->DiagMessage; else S.Diag(SubstDiag->DiagLoc, diag::note_type_requirement_unknown_substitution_error) << (int)First << SubstDiag->SubstitutedEntity; return; } default: llvm_unreachable("Unknown satisfaction status"); return; } } static void diagnoseWellFormedUnsatisfiedConstraintExpr(Sema &S, Expr *SubstExpr, bool First = true); static void diagnoseUnsatisfiedRequirement(Sema &S, concepts::NestedRequirement *Req, bool First) { using SubstitutionDiagnostic = std::pair; for (auto &Record : Req->getConstraintSatisfaction()) { if (auto *SubstDiag = Record.dyn_cast()) S.Diag(SubstDiag->first, diag::note_nested_requirement_substitution_error) << (int)First << Req->getInvalidConstraintEntity() << SubstDiag->second; else diagnoseWellFormedUnsatisfiedConstraintExpr(S, Record.dyn_cast(), First); First = false; } } static void diagnoseWellFormedUnsatisfiedConstraintExpr(Sema &S, Expr *SubstExpr, bool First) { SubstExpr = SubstExpr->IgnoreParenImpCasts(); if (BinaryOperator *BO = dyn_cast(SubstExpr)) { switch (BO->getOpcode()) { // These two cases will in practice only be reached when using fold // expressions with || and &&, since otherwise the || and && will have been // broken down into atomic constraints during satisfaction checking. case BO_LOr: // Or evaluated to false - meaning both RHS and LHS evaluated to false. diagnoseWellFormedUnsatisfiedConstraintExpr(S, BO->getLHS(), First); diagnoseWellFormedUnsatisfiedConstraintExpr(S, BO->getRHS(), /*First=*/false); return; case BO_LAnd: { bool LHSSatisfied = BO->getLHS()->EvaluateKnownConstInt(S.Context).getBoolValue(); if (LHSSatisfied) { // LHS is true, so RHS must be false. diagnoseWellFormedUnsatisfiedConstraintExpr(S, BO->getRHS(), First); return; } // LHS is false diagnoseWellFormedUnsatisfiedConstraintExpr(S, BO->getLHS(), First); // RHS might also be false bool RHSSatisfied = BO->getRHS()->EvaluateKnownConstInt(S.Context).getBoolValue(); if (!RHSSatisfied) diagnoseWellFormedUnsatisfiedConstraintExpr(S, BO->getRHS(), /*First=*/false); return; } case BO_GE: case BO_LE: case BO_GT: case BO_LT: case BO_EQ: case BO_NE: if (BO->getLHS()->getType()->isIntegerType() && BO->getRHS()->getType()->isIntegerType()) { Expr::EvalResult SimplifiedLHS; Expr::EvalResult SimplifiedRHS; BO->getLHS()->EvaluateAsInt(SimplifiedLHS, S.Context, Expr::SE_NoSideEffects, /*InConstantContext=*/true); BO->getRHS()->EvaluateAsInt(SimplifiedRHS, S.Context, Expr::SE_NoSideEffects, /*InConstantContext=*/true); if (!SimplifiedLHS.Diag && ! SimplifiedRHS.Diag) { S.Diag(SubstExpr->getBeginLoc(), diag::note_atomic_constraint_evaluated_to_false_elaborated) << (int)First << SubstExpr << toString(SimplifiedLHS.Val.getInt(), 10) << BinaryOperator::getOpcodeStr(BO->getOpcode()) << toString(SimplifiedRHS.Val.getInt(), 10); return; } } break; default: break; } } else if (auto *CSE = dyn_cast(SubstExpr)) { if (CSE->getTemplateArgsAsWritten()->NumTemplateArgs == 1) { S.Diag( CSE->getSourceRange().getBegin(), diag:: note_single_arg_concept_specialization_constraint_evaluated_to_false) << (int)First << CSE->getTemplateArgsAsWritten()->arguments()[0].getArgument() << CSE->getNamedConcept(); } else { S.Diag(SubstExpr->getSourceRange().getBegin(), diag::note_concept_specialization_constraint_evaluated_to_false) << (int)First << CSE; } S.DiagnoseUnsatisfiedConstraint(CSE->getSatisfaction()); return; } else if (auto *RE = dyn_cast(SubstExpr)) { // FIXME: RequiresExpr should store dependent diagnostics. for (concepts::Requirement *Req : RE->getRequirements()) if (!Req->isDependent() && !Req->isSatisfied()) { if (auto *E = dyn_cast(Req)) diagnoseUnsatisfiedRequirement(S, E, First); else if (auto *T = dyn_cast(Req)) diagnoseUnsatisfiedRequirement(S, T, First); else diagnoseUnsatisfiedRequirement( S, cast(Req), First); break; } return; } else if (auto *TTE = dyn_cast(SubstExpr); TTE && TTE->getTrait() == clang::TypeTrait::BTT_IsDeducible) { assert(TTE->getNumArgs() == 2); S.Diag(SubstExpr->getSourceRange().getBegin(), diag::note_is_deducible_constraint_evaluated_to_false) << TTE->getArg(0)->getType() << TTE->getArg(1)->getType(); return; } S.Diag(SubstExpr->getSourceRange().getBegin(), diag::note_atomic_constraint_evaluated_to_false) << (int)First << SubstExpr; } template static void diagnoseUnsatisfiedConstraintExpr( Sema &S, const llvm::PointerUnion &Record, bool First = true) { if (auto *Diag = Record.template dyn_cast()) { S.Diag(Diag->first, diag::note_substituted_constraint_expr_is_ill_formed) << Diag->second; return; } diagnoseWellFormedUnsatisfiedConstraintExpr(S, Record.template get(), First); } void Sema::DiagnoseUnsatisfiedConstraint(const ConstraintSatisfaction& Satisfaction, bool First) { assert(!Satisfaction.IsSatisfied && "Attempted to diagnose a satisfied constraint"); for (auto &Record : Satisfaction.Details) { diagnoseUnsatisfiedConstraintExpr(*this, Record, First); First = false; } } void Sema::DiagnoseUnsatisfiedConstraint( const ASTConstraintSatisfaction &Satisfaction, bool First) { assert(!Satisfaction.IsSatisfied && "Attempted to diagnose a satisfied constraint"); for (auto &Record : Satisfaction) { diagnoseUnsatisfiedConstraintExpr(*this, Record, First); First = false; } } const NormalizedConstraint * Sema::getNormalizedAssociatedConstraints( NamedDecl *ConstrainedDecl, ArrayRef AssociatedConstraints) { // In case the ConstrainedDecl comes from modules, it is necessary to use // the canonical decl to avoid different atomic constraints with the 'same' // declarations. ConstrainedDecl = cast(ConstrainedDecl->getCanonicalDecl()); auto CacheEntry = NormalizationCache.find(ConstrainedDecl); if (CacheEntry == NormalizationCache.end()) { auto Normalized = NormalizedConstraint::fromConstraintExprs(*this, ConstrainedDecl, AssociatedConstraints); CacheEntry = NormalizationCache .try_emplace(ConstrainedDecl, Normalized ? new (Context) NormalizedConstraint( std::move(*Normalized)) : nullptr) .first; } return CacheEntry->second; } const NormalizedConstraint *clang::getNormalizedAssociatedConstraints( Sema &S, NamedDecl *ConstrainedDecl, ArrayRef AssociatedConstraints) { return S.getNormalizedAssociatedConstraints(ConstrainedDecl, AssociatedConstraints); } static bool substituteParameterMappings(Sema &S, NormalizedConstraint &N, ConceptDecl *Concept, const MultiLevelTemplateArgumentList &MLTAL, const ASTTemplateArgumentListInfo *ArgsAsWritten) { if (N.isCompound()) { if (substituteParameterMappings(S, N.getLHS(), Concept, MLTAL, ArgsAsWritten)) return true; return substituteParameterMappings(S, N.getRHS(), Concept, MLTAL, ArgsAsWritten); } if (N.isFoldExpanded()) { Sema::ArgumentPackSubstitutionIndexRAII _(S, -1); return substituteParameterMappings( S, N.getFoldExpandedConstraint()->Constraint, Concept, MLTAL, ArgsAsWritten); } TemplateParameterList *TemplateParams = Concept->getTemplateParameters(); AtomicConstraint &Atomic = *N.getAtomicConstraint(); TemplateArgumentListInfo SubstArgs; if (!Atomic.ParameterMapping) { llvm::SmallBitVector OccurringIndices(TemplateParams->size()); S.MarkUsedTemplateParameters(Atomic.ConstraintExpr, /*OnlyDeduced=*/false, /*Depth=*/0, OccurringIndices); TemplateArgumentLoc *TempArgs = new (S.Context) TemplateArgumentLoc[OccurringIndices.count()]; for (unsigned I = 0, J = 0, C = TemplateParams->size(); I != C; ++I) if (OccurringIndices[I]) new (&(TempArgs)[J++]) TemplateArgumentLoc(S.getIdentityTemplateArgumentLoc( TemplateParams->begin()[I], // Here we assume we do not support things like // template // concept C = ...; // // template requires C // struct S { }; // The above currently yields a diagnostic. // We still might have default arguments for concept parameters. ArgsAsWritten->NumTemplateArgs > I ? ArgsAsWritten->arguments()[I].getLocation() : SourceLocation())); Atomic.ParameterMapping.emplace(TempArgs, OccurringIndices.count()); } SourceLocation InstLocBegin = ArgsAsWritten->arguments().empty() ? ArgsAsWritten->getLAngleLoc() : ArgsAsWritten->arguments().front().getSourceRange().getBegin(); SourceLocation InstLocEnd = ArgsAsWritten->arguments().empty() ? ArgsAsWritten->getRAngleLoc() : ArgsAsWritten->arguments().front().getSourceRange().getEnd(); Sema::InstantiatingTemplate Inst( S, InstLocBegin, Sema::InstantiatingTemplate::ParameterMappingSubstitution{}, Concept, {InstLocBegin, InstLocEnd}); if (Inst.isInvalid()) return true; if (S.SubstTemplateArguments(*Atomic.ParameterMapping, MLTAL, SubstArgs)) return true; TemplateArgumentLoc *TempArgs = new (S.Context) TemplateArgumentLoc[SubstArgs.size()]; std::copy(SubstArgs.arguments().begin(), SubstArgs.arguments().end(), TempArgs); Atomic.ParameterMapping.emplace(TempArgs, SubstArgs.size()); return false; } static bool substituteParameterMappings(Sema &S, NormalizedConstraint &N, const ConceptSpecializationExpr *CSE) { MultiLevelTemplateArgumentList MLTAL = S.getTemplateInstantiationArgs( CSE->getNamedConcept(), CSE->getNamedConcept()->getLexicalDeclContext(), /*Final=*/false, CSE->getTemplateArguments(), /*RelativeToPrimary=*/true, /*Pattern=*/nullptr, /*ForConstraintInstantiation=*/true); return substituteParameterMappings(S, N, CSE->getNamedConcept(), MLTAL, CSE->getTemplateArgsAsWritten()); } NormalizedConstraint::NormalizedConstraint(ASTContext &C, NormalizedConstraint LHS, NormalizedConstraint RHS, CompoundConstraintKind Kind) : Constraint{CompoundConstraint{ new(C) NormalizedConstraintPair{std::move(LHS), std::move(RHS)}, Kind}} {} NormalizedConstraint::NormalizedConstraint(ASTContext &C, const NormalizedConstraint &Other) { if (Other.isAtomic()) { Constraint = new (C) AtomicConstraint(*Other.getAtomicConstraint()); } else if (Other.isFoldExpanded()) { Constraint = new (C) FoldExpandedConstraint( Other.getFoldExpandedConstraint()->Kind, NormalizedConstraint(C, Other.getFoldExpandedConstraint()->Constraint), Other.getFoldExpandedConstraint()->Pattern); } else { Constraint = CompoundConstraint( new (C) NormalizedConstraintPair{NormalizedConstraint(C, Other.getLHS()), NormalizedConstraint(C, Other.getRHS())}, Other.getCompoundKind()); } } NormalizedConstraint &NormalizedConstraint::getLHS() const { assert(isCompound() && "getLHS called on a non-compound constraint."); return Constraint.get().getPointer()->LHS; } NormalizedConstraint &NormalizedConstraint::getRHS() const { assert(isCompound() && "getRHS called on a non-compound constraint."); return Constraint.get().getPointer()->RHS; } std::optional NormalizedConstraint::fromConstraintExprs(Sema &S, NamedDecl *D, ArrayRef E) { assert(E.size() != 0); auto Conjunction = fromConstraintExpr(S, D, E[0]); if (!Conjunction) return std::nullopt; for (unsigned I = 1; I < E.size(); ++I) { auto Next = fromConstraintExpr(S, D, E[I]); if (!Next) return std::nullopt; *Conjunction = NormalizedConstraint(S.Context, std::move(*Conjunction), std::move(*Next), CCK_Conjunction); } return Conjunction; } std::optional NormalizedConstraint::fromConstraintExpr(Sema &S, NamedDecl *D, const Expr *E) { assert(E != nullptr); // C++ [temp.constr.normal]p1.1 // [...] // - The normal form of an expression (E) is the normal form of E. // [...] E = E->IgnoreParenImpCasts(); // C++2a [temp.param]p4: // [...] If T is not a pack, then E is E', otherwise E is (E' && ...). // Fold expression is considered atomic constraints per current wording. // See http://cplusplus.github.io/concepts-ts/ts-active.html#28 if (LogicalBinOp BO = E) { auto LHS = fromConstraintExpr(S, D, BO.getLHS()); if (!LHS) return std::nullopt; auto RHS = fromConstraintExpr(S, D, BO.getRHS()); if (!RHS) return std::nullopt; return NormalizedConstraint(S.Context, std::move(*LHS), std::move(*RHS), BO.isAnd() ? CCK_Conjunction : CCK_Disjunction); } else if (auto *CSE = dyn_cast(E)) { const NormalizedConstraint *SubNF; { Sema::InstantiatingTemplate Inst( S, CSE->getExprLoc(), Sema::InstantiatingTemplate::ConstraintNormalization{}, D, CSE->getSourceRange()); if (Inst.isInvalid()) return std::nullopt; // C++ [temp.constr.normal]p1.1 // [...] // The normal form of an id-expression of the form C, // where C names a concept, is the normal form of the // constraint-expression of C, after substituting A1, A2, ..., AN for C’s // respective template parameters in the parameter mappings in each atomic // constraint. If any such substitution results in an invalid type or // expression, the program is ill-formed; no diagnostic is required. // [...] ConceptDecl *CD = CSE->getNamedConcept(); SubNF = S.getNormalizedAssociatedConstraints(CD, {CD->getConstraintExpr()}); if (!SubNF) return std::nullopt; } std::optional New; New.emplace(S.Context, *SubNF); if (substituteParameterMappings(S, *New, CSE)) return std::nullopt; return New; } else if (auto *FE = dyn_cast(E); FE && S.getLangOpts().CPlusPlus26 && (FE->getOperator() == BinaryOperatorKind::BO_LAnd || FE->getOperator() == BinaryOperatorKind::BO_LOr)) { // Normalize fold expressions in C++26. FoldExpandedConstraint::FoldOperatorKind Kind = FE->getOperator() == BinaryOperatorKind::BO_LAnd ? FoldExpandedConstraint::FoldOperatorKind::And : FoldExpandedConstraint::FoldOperatorKind::Or; if (FE->getInit()) { auto LHS = fromConstraintExpr(S, D, FE->getLHS()); auto RHS = fromConstraintExpr(S, D, FE->getRHS()); if (!LHS || !RHS) return std::nullopt; if (FE->isRightFold()) RHS = NormalizedConstraint{new (S.Context) FoldExpandedConstraint{ Kind, std::move(*RHS), FE->getPattern()}}; else LHS = NormalizedConstraint{new (S.Context) FoldExpandedConstraint{ Kind, std::move(*LHS), FE->getPattern()}}; return NormalizedConstraint( S.Context, std::move(*LHS), std::move(*RHS), FE->getOperator() == BinaryOperatorKind::BO_LAnd ? CCK_Conjunction : CCK_Disjunction); } auto Sub = fromConstraintExpr(S, D, FE->getPattern()); if (!Sub) return std::nullopt; return NormalizedConstraint{new (S.Context) FoldExpandedConstraint{ Kind, std::move(*Sub), FE->getPattern()}}; } return NormalizedConstraint{new (S.Context) AtomicConstraint(S, E)}; } bool FoldExpandedConstraint::AreCompatibleForSubsumption( const FoldExpandedConstraint &A, const FoldExpandedConstraint &B) { // [C++26] [temp.constr.fold] // Two fold expanded constraints are compatible for subsumption // if their respective constraints both contain an equivalent unexpanded pack. llvm::SmallVector APacks, BPacks; Sema::collectUnexpandedParameterPacks(const_cast(A.Pattern), APacks); Sema::collectUnexpandedParameterPacks(const_cast(B.Pattern), BPacks); for (const UnexpandedParameterPack &APack : APacks) { std::pair DepthAndIndex = getDepthAndIndex(APack); auto it = llvm::find_if(BPacks, [&](const UnexpandedParameterPack &BPack) { return getDepthAndIndex(BPack) == DepthAndIndex; }); if (it != BPacks.end()) return true; } return false; } NormalForm clang::makeCNF(const NormalizedConstraint &Normalized) { if (Normalized.isAtomic()) return {{Normalized.getAtomicConstraint()}}; else if (Normalized.isFoldExpanded()) return {{Normalized.getFoldExpandedConstraint()}}; NormalForm LCNF = makeCNF(Normalized.getLHS()); NormalForm RCNF = makeCNF(Normalized.getRHS()); if (Normalized.getCompoundKind() == NormalizedConstraint::CCK_Conjunction) { LCNF.reserve(LCNF.size() + RCNF.size()); while (!RCNF.empty()) LCNF.push_back(RCNF.pop_back_val()); return LCNF; } // Disjunction NormalForm Res; Res.reserve(LCNF.size() * RCNF.size()); for (auto &LDisjunction : LCNF) for (auto &RDisjunction : RCNF) { NormalForm::value_type Combined; Combined.reserve(LDisjunction.size() + RDisjunction.size()); std::copy(LDisjunction.begin(), LDisjunction.end(), std::back_inserter(Combined)); std::copy(RDisjunction.begin(), RDisjunction.end(), std::back_inserter(Combined)); Res.emplace_back(Combined); } return Res; } NormalForm clang::makeDNF(const NormalizedConstraint &Normalized) { if (Normalized.isAtomic()) return {{Normalized.getAtomicConstraint()}}; else if (Normalized.isFoldExpanded()) return {{Normalized.getFoldExpandedConstraint()}}; NormalForm LDNF = makeDNF(Normalized.getLHS()); NormalForm RDNF = makeDNF(Normalized.getRHS()); if (Normalized.getCompoundKind() == NormalizedConstraint::CCK_Disjunction) { LDNF.reserve(LDNF.size() + RDNF.size()); while (!RDNF.empty()) LDNF.push_back(RDNF.pop_back_val()); return LDNF; } // Conjunction NormalForm Res; Res.reserve(LDNF.size() * RDNF.size()); for (auto &LConjunction : LDNF) { for (auto &RConjunction : RDNF) { NormalForm::value_type Combined; Combined.reserve(LConjunction.size() + RConjunction.size()); std::copy(LConjunction.begin(), LConjunction.end(), std::back_inserter(Combined)); std::copy(RConjunction.begin(), RConjunction.end(), std::back_inserter(Combined)); Res.emplace_back(Combined); } } return Res; } bool Sema::IsAtLeastAsConstrained(NamedDecl *D1, MutableArrayRef AC1, NamedDecl *D2, MutableArrayRef AC2, bool &Result) { if (const auto *FD1 = dyn_cast(D1)) { auto IsExpectedEntity = [](const FunctionDecl *FD) { FunctionDecl::TemplatedKind Kind = FD->getTemplatedKind(); return Kind == FunctionDecl::TK_NonTemplate || Kind == FunctionDecl::TK_FunctionTemplate; }; const auto *FD2 = dyn_cast(D2); (void)IsExpectedEntity; (void)FD1; (void)FD2; assert(IsExpectedEntity(FD1) && FD2 && IsExpectedEntity(FD2) && "use non-instantiated function declaration for constraints partial " "ordering"); } if (AC1.empty()) { Result = AC2.empty(); return false; } if (AC2.empty()) { // TD1 has associated constraints and TD2 does not. Result = true; return false; } std::pair Key{D1, D2}; auto CacheEntry = SubsumptionCache.find(Key); if (CacheEntry != SubsumptionCache.end()) { Result = CacheEntry->second; return false; } unsigned Depth1 = CalculateTemplateDepthForConstraints(*this, D1, true); unsigned Depth2 = CalculateTemplateDepthForConstraints(*this, D2, true); for (size_t I = 0; I != AC1.size() && I != AC2.size(); ++I) { if (Depth2 > Depth1) { AC1[I] = AdjustConstraintDepth(*this, Depth2 - Depth1) .TransformExpr(const_cast(AC1[I])) .get(); } else if (Depth1 > Depth2) { AC2[I] = AdjustConstraintDepth(*this, Depth1 - Depth2) .TransformExpr(const_cast(AC2[I])) .get(); } } if (clang::subsumes( *this, D1, AC1, D2, AC2, Result, [this](const AtomicConstraint &A, const AtomicConstraint &B) { return A.subsumes(Context, B); })) return true; SubsumptionCache.try_emplace(Key, Result); return false; } bool Sema::MaybeEmitAmbiguousAtomicConstraintsDiagnostic(NamedDecl *D1, ArrayRef AC1, NamedDecl *D2, ArrayRef AC2) { if (isSFINAEContext()) // No need to work here because our notes would be discarded. return false; if (AC1.empty() || AC2.empty()) return false; auto NormalExprEvaluator = [this] (const AtomicConstraint &A, const AtomicConstraint &B) { return A.subsumes(Context, B); }; const Expr *AmbiguousAtomic1 = nullptr, *AmbiguousAtomic2 = nullptr; auto IdenticalExprEvaluator = [&] (const AtomicConstraint &A, const AtomicConstraint &B) { if (!A.hasMatchingParameterMapping(Context, B)) return false; const Expr *EA = A.ConstraintExpr, *EB = B.ConstraintExpr; if (EA == EB) return true; // Not the same source level expression - are the expressions // identical? llvm::FoldingSetNodeID IDA, IDB; EA->Profile(IDA, Context, /*Canonical=*/true); EB->Profile(IDB, Context, /*Canonical=*/true); if (IDA != IDB) return false; AmbiguousAtomic1 = EA; AmbiguousAtomic2 = EB; return true; }; { // The subsumption checks might cause diagnostics SFINAETrap Trap(*this); auto *Normalized1 = getNormalizedAssociatedConstraints(D1, AC1); if (!Normalized1) return false; const NormalForm DNF1 = makeDNF(*Normalized1); const NormalForm CNF1 = makeCNF(*Normalized1); auto *Normalized2 = getNormalizedAssociatedConstraints(D2, AC2); if (!Normalized2) return false; const NormalForm DNF2 = makeDNF(*Normalized2); const NormalForm CNF2 = makeCNF(*Normalized2); bool Is1AtLeastAs2Normally = clang::subsumes(DNF1, CNF2, NormalExprEvaluator); bool Is2AtLeastAs1Normally = clang::subsumes(DNF2, CNF1, NormalExprEvaluator); bool Is1AtLeastAs2 = clang::subsumes(DNF1, CNF2, IdenticalExprEvaluator); bool Is2AtLeastAs1 = clang::subsumes(DNF2, CNF1, IdenticalExprEvaluator); if (Is1AtLeastAs2 == Is1AtLeastAs2Normally && Is2AtLeastAs1 == Is2AtLeastAs1Normally) // Same result - no ambiguity was caused by identical atomic expressions. return false; } // A different result! Some ambiguous atomic constraint(s) caused a difference assert(AmbiguousAtomic1 && AmbiguousAtomic2); Diag(AmbiguousAtomic1->getBeginLoc(), diag::note_ambiguous_atomic_constraints) << AmbiguousAtomic1->getSourceRange(); Diag(AmbiguousAtomic2->getBeginLoc(), diag::note_ambiguous_atomic_constraints_similar_expression) << AmbiguousAtomic2->getSourceRange(); return true; } concepts::ExprRequirement::ExprRequirement( Expr *E, bool IsSimple, SourceLocation NoexceptLoc, ReturnTypeRequirement Req, SatisfactionStatus Status, ConceptSpecializationExpr *SubstitutedConstraintExpr) : Requirement(IsSimple ? RK_Simple : RK_Compound, Status == SS_Dependent, Status == SS_Dependent && (E->containsUnexpandedParameterPack() || Req.containsUnexpandedParameterPack()), Status == SS_Satisfied), Value(E), NoexceptLoc(NoexceptLoc), TypeReq(Req), SubstitutedConstraintExpr(SubstitutedConstraintExpr), Status(Status) { assert((!IsSimple || (Req.isEmpty() && NoexceptLoc.isInvalid())) && "Simple requirement must not have a return type requirement or a " "noexcept specification"); assert((Status > SS_TypeRequirementSubstitutionFailure && Req.isTypeConstraint()) == (SubstitutedConstraintExpr != nullptr)); } concepts::ExprRequirement::ExprRequirement( SubstitutionDiagnostic *ExprSubstDiag, bool IsSimple, SourceLocation NoexceptLoc, ReturnTypeRequirement Req) : Requirement(IsSimple ? RK_Simple : RK_Compound, Req.isDependent(), Req.containsUnexpandedParameterPack(), /*IsSatisfied=*/false), Value(ExprSubstDiag), NoexceptLoc(NoexceptLoc), TypeReq(Req), Status(SS_ExprSubstitutionFailure) { assert((!IsSimple || (Req.isEmpty() && NoexceptLoc.isInvalid())) && "Simple requirement must not have a return type requirement or a " "noexcept specification"); } concepts::ExprRequirement::ReturnTypeRequirement:: ReturnTypeRequirement(TemplateParameterList *TPL) : TypeConstraintInfo(TPL, false) { assert(TPL->size() == 1); const TypeConstraint *TC = cast(TPL->getParam(0))->getTypeConstraint(); assert(TC && "TPL must have a template type parameter with a type constraint"); auto *Constraint = cast(TC->getImmediatelyDeclaredConstraint()); bool Dependent = Constraint->getTemplateArgsAsWritten() && TemplateSpecializationType::anyInstantiationDependentTemplateArguments( Constraint->getTemplateArgsAsWritten()->arguments().drop_front(1)); TypeConstraintInfo.setInt(Dependent ? true : false); } concepts::TypeRequirement::TypeRequirement(TypeSourceInfo *T) : Requirement(RK_Type, T->getType()->isInstantiationDependentType(), T->getType()->containsUnexpandedParameterPack(), // We reach this ctor with either dependent types (in which // IsSatisfied doesn't matter) or with non-dependent type in // which the existence of the type indicates satisfaction. /*IsSatisfied=*/true), Value(T), Status(T->getType()->isInstantiationDependentType() ? SS_Dependent : SS_Satisfied) {} diff --git a/compiler-rt/lib/builtins/int_math.h b/compiler-rt/lib/builtins/int_math.h index 74d3e311db5e..08bfe922ffa1 100644 --- a/compiler-rt/lib/builtins/int_math.h +++ b/compiler-rt/lib/builtins/int_math.h @@ -1,108 +1,113 @@ //===-- int_math.h - internal math inlines --------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file is not part of the interface of this library. // // This file defines substitutes for the libm functions used in some of the // compiler-rt implementations, defined in such a way that there is not a direct // dependency on libm or math.h. Instead, we use the compiler builtin versions // where available. This reduces our dependencies on the system SDK by foisting // the responsibility onto the compiler. // //===----------------------------------------------------------------------===// #ifndef INT_MATH_H #define INT_MATH_H #ifndef __has_builtin #define __has_builtin(x) 0 #endif #if defined(_MSC_VER) && !defined(__clang__) #include #include #endif #if defined(_MSC_VER) && !defined(__clang__) #define CRT_INFINITY INFINITY #else #define CRT_INFINITY __builtin_huge_valf() #endif #if defined(_MSC_VER) && !defined(__clang__) #define crt_isfinite(x) _finite((x)) #define crt_isinf(x) !_finite((x)) #define crt_isnan(x) _isnan((x)) #else // Define crt_isfinite in terms of the builtin if available, otherwise provide // an alternate version in terms of our other functions. This supports some // versions of GCC which didn't have __builtin_isfinite. #if __has_builtin(__builtin_isfinite) #define crt_isfinite(x) __builtin_isfinite((x)) #elif defined(__GNUC__) #define crt_isfinite(x) \ __extension__(({ \ __typeof((x)) x_ = (x); \ !crt_isinf(x_) && !crt_isnan(x_); \ })) #else #error "Do not know how to check for infinity" #endif // __has_builtin(__builtin_isfinite) #define crt_isinf(x) __builtin_isinf((x)) #define crt_isnan(x) __builtin_isnan((x)) #endif // _MSC_VER #if defined(_MSC_VER) && !defined(__clang__) #define crt_copysign(x, y) copysign((x), (y)) #define crt_copysignf(x, y) copysignf((x), (y)) #define crt_copysignl(x, y) copysignl((x), (y)) #else #define crt_copysign(x, y) __builtin_copysign((x), (y)) #define crt_copysignf(x, y) __builtin_copysignf((x), (y)) #define crt_copysignl(x, y) __builtin_copysignl((x), (y)) -#if __has_builtin(__builtin_copysignf128) +// We define __has_builtin to always return 0 for GCC versions below 10, +// but __builtin_copysignf128 is available since version 7. +#if __has_builtin(__builtin_copysignf128) || \ + (defined(__GNUC__) && __GNUC__ >= 7) #define crt_copysignf128(x, y) __builtin_copysignf128((x), (y)) -#elif __has_builtin(__builtin_copysignq) || (defined(__GNUC__) && __GNUC__ >= 7) +#elif __has_builtin(__builtin_copysignq) #define crt_copysignf128(x, y) __builtin_copysignq((x), (y)) #endif #endif #if defined(_MSC_VER) && !defined(__clang__) #define crt_fabs(x) fabs((x)) #define crt_fabsf(x) fabsf((x)) #define crt_fabsl(x) fabs((x)) #else #define crt_fabs(x) __builtin_fabs((x)) #define crt_fabsf(x) __builtin_fabsf((x)) #define crt_fabsl(x) __builtin_fabsl((x)) -#if __has_builtin(__builtin_fabsf128) +// We define __has_builtin to always return 0 for GCC versions below 10, +// but __builtin_fabsf128 is available since version 7. +#if __has_builtin(__builtin_fabsf128) || (defined(__GNUC__) && __GNUC__ >= 7) #define crt_fabsf128(x) __builtin_fabsf128((x)) -#elif __has_builtin(__builtin_fabsq) || (defined(__GNUC__) && __GNUC__ >= 7) +#elif __has_builtin(__builtin_fabsq) #define crt_fabsf128(x) __builtin_fabsq((x)) #endif #endif #if defined(_MSC_VER) && !defined(__clang__) #define crt_fmaxl(x, y) __max((x), (y)) #else #define crt_fmaxl(x, y) __builtin_fmaxl((x), (y)) #endif #if defined(_MSC_VER) && !defined(__clang__) #define crt_logbl(x) logbl((x)) #else #define crt_logbl(x) __builtin_logbl((x)) #endif #if defined(_MSC_VER) && !defined(__clang__) #define crt_scalbnl(x, y) scalbnl((x), (y)) #else #define crt_scalbnl(x, y) __builtin_scalbnl((x), (y)) #endif #endif // INT_MATH_H diff --git a/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp b/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp index 73eea07cf869..da3eb3cfb340 100644 --- a/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp @@ -1,265 +1,266 @@ //===- FuzzerUtilWindows.cpp - Misc utils for Windows. --------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // Misc utils implementation for Windows. //===----------------------------------------------------------------------===// #include "FuzzerPlatform.h" #if LIBFUZZER_WINDOWS #include "FuzzerCommand.h" #include "FuzzerIO.h" #include "FuzzerInternal.h" #include #include #include #include #include #include #include #include #include // clang-format off #include // These must be included after windows.h. // archicture need to be set before including // libloaderapi #include #include #include // clang-format on namespace fuzzer { static const FuzzingOptions* HandlerOpt = nullptr; static LONG CALLBACK ExceptionHandler(PEXCEPTION_POINTERS ExceptionInfo) { switch (ExceptionInfo->ExceptionRecord->ExceptionCode) { case EXCEPTION_ACCESS_VIOLATION: case EXCEPTION_ARRAY_BOUNDS_EXCEEDED: case EXCEPTION_STACK_OVERFLOW: if (HandlerOpt->HandleSegv) Fuzzer::StaticCrashSignalCallback(); break; case EXCEPTION_DATATYPE_MISALIGNMENT: case EXCEPTION_IN_PAGE_ERROR: if (HandlerOpt->HandleBus) Fuzzer::StaticCrashSignalCallback(); break; case EXCEPTION_ILLEGAL_INSTRUCTION: case EXCEPTION_PRIV_INSTRUCTION: if (HandlerOpt->HandleIll) Fuzzer::StaticCrashSignalCallback(); break; case EXCEPTION_FLT_DENORMAL_OPERAND: case EXCEPTION_FLT_DIVIDE_BY_ZERO: case EXCEPTION_FLT_INEXACT_RESULT: case EXCEPTION_FLT_INVALID_OPERATION: case EXCEPTION_FLT_OVERFLOW: case EXCEPTION_FLT_STACK_CHECK: case EXCEPTION_FLT_UNDERFLOW: case EXCEPTION_INT_DIVIDE_BY_ZERO: case EXCEPTION_INT_OVERFLOW: if (HandlerOpt->HandleFpe) Fuzzer::StaticCrashSignalCallback(); break; // This is an undocumented exception code corresponding to a Visual C++ // Exception. // // See: https://devblogs.microsoft.com/oldnewthing/20100730-00/?p=13273 case 0xE06D7363: if (HandlerOpt->HandleWinExcept) Fuzzer::StaticCrashSignalCallback(); break; // TODO: Handle (Options.HandleXfsz) } return EXCEPTION_CONTINUE_SEARCH; } BOOL WINAPI CtrlHandler(DWORD dwCtrlType) { switch (dwCtrlType) { case CTRL_C_EVENT: if (HandlerOpt->HandleInt) Fuzzer::StaticInterruptCallback(); return TRUE; case CTRL_BREAK_EVENT: if (HandlerOpt->HandleTerm) Fuzzer::StaticInterruptCallback(); return TRUE; } return FALSE; } void CALLBACK AlarmHandler(PVOID, BOOLEAN) { Fuzzer::StaticAlarmCallback(); } class TimerQ { HANDLE TimerQueue; public: TimerQ() : TimerQueue(NULL) {} ~TimerQ() { if (TimerQueue) DeleteTimerQueueEx(TimerQueue, NULL); } void SetTimer(int Seconds) { if (!TimerQueue) { TimerQueue = CreateTimerQueue(); if (!TimerQueue) { Printf("libFuzzer: CreateTimerQueue failed.\n"); exit(1); } } HANDLE Timer; if (!CreateTimerQueueTimer(&Timer, TimerQueue, AlarmHandler, NULL, Seconds*1000, Seconds*1000, 0)) { Printf("libFuzzer: CreateTimerQueueTimer failed.\n"); exit(1); } } }; static TimerQ Timer; static void CrashHandler(int) { Fuzzer::StaticCrashSignalCallback(); } void SetSignalHandler(const FuzzingOptions& Options) { HandlerOpt = &Options; if (Options.HandleAlrm && Options.UnitTimeoutSec > 0) Timer.SetTimer(Options.UnitTimeoutSec / 2 + 1); if (Options.HandleInt || Options.HandleTerm) if (!SetConsoleCtrlHandler(CtrlHandler, TRUE)) { DWORD LastError = GetLastError(); Printf("libFuzzer: SetConsoleCtrlHandler failed (Error code: %lu).\n", LastError); exit(1); } if (Options.HandleSegv || Options.HandleBus || Options.HandleIll || Options.HandleFpe || Options.HandleWinExcept) SetUnhandledExceptionFilter(ExceptionHandler); if (Options.HandleAbrt) if (SIG_ERR == signal(SIGABRT, CrashHandler)) { Printf("libFuzzer: signal failed with %d\n", errno); exit(1); } } void SleepSeconds(int Seconds) { Sleep(Seconds * 1000); } unsigned long GetPid() { return GetCurrentProcessId(); } size_t GetPeakRSSMb() { PROCESS_MEMORY_COUNTERS info; if (!GetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info))) return 0; return info.PeakWorkingSetSize >> 20; } FILE *OpenProcessPipe(const char *Command, const char *Mode) { return _popen(Command, Mode); } int CloseProcessPipe(FILE *F) { return _pclose(F); } int ExecuteCommand(const Command &Cmd) { std::string CmdLine = Cmd.toString(); return system(CmdLine.c_str()); } bool ExecuteCommand(const Command &Cmd, std::string *CmdOutput) { FILE *Pipe = _popen(Cmd.toString().c_str(), "r"); if (!Pipe) return false; if (CmdOutput) { char TmpBuffer[128]; while (fgets(TmpBuffer, sizeof(TmpBuffer), Pipe)) CmdOutput->append(TmpBuffer); } return _pclose(Pipe) == 0; } const void *SearchMemory(const void *Data, size_t DataLen, const void *Patt, size_t PattLen) { // TODO: make this implementation more efficient. const char *Cdata = (const char *)Data; const char *Cpatt = (const char *)Patt; if (!Data || !Patt || DataLen == 0 || PattLen == 0 || DataLen < PattLen) return NULL; if (PattLen == 1) return memchr(Data, *Cpatt, DataLen); const char *End = Cdata + DataLen - PattLen + 1; for (const char *It = Cdata; It < End; ++It) if (It[0] == Cpatt[0] && memcmp(It, Cpatt, PattLen) == 0) return It; return NULL; } std::string DisassembleCmd(const std::string &FileName) { std::vector command_vector; command_vector.push_back("dumpbin /summary > nul"); if (ExecuteCommand(Command(command_vector)) == 0) return "dumpbin /disasm " + FileName; Printf("libFuzzer: couldn't find tool to disassemble (dumpbin)\n"); exit(1); } std::string SearchRegexCmd(const std::string &Regex) { return "findstr /r \"" + Regex + "\""; } void DiscardOutput(int Fd) { FILE* Temp = fopen("nul", "w"); if (!Temp) return; _dup2(_fileno(Temp), Fd); fclose(Temp); } size_t PageSize() { static size_t PageSizeCached = []() -> size_t { SYSTEM_INFO si; GetSystemInfo(&si); return si.dwPageSize; }(); return PageSizeCached; } void SetThreadName(std::thread &thread, const std::string &name) { -#if defined(_LIBCPP_HAS_THREAD_API_PTHREAD) || \ - defined(_GLIBCXX_GCC_GTHR_POSIX_H) - (void)pthread_setname_np(thread.native_handle(), name.c_str()); -#else +#ifndef __MINGW32__ + // Not setting the thread name in MinGW environments. MinGW C++ standard + // libraries can either use native Windows threads or pthreads, so we + // don't know with certainty what kind of thread handle we're getting + // from thread.native_handle() here. typedef HRESULT(WINAPI * proc)(HANDLE, PCWSTR); HMODULE kbase = GetModuleHandleA("KernelBase.dll"); proc ThreadNameProc = reinterpret_cast(GetProcAddress(kbase, "SetThreadDescription")); if (ThreadNameProc) { std::wstring buf; auto sz = MultiByteToWideChar(CP_UTF8, 0, name.data(), -1, nullptr, 0); if (sz > 0) { buf.resize(sz); if (MultiByteToWideChar(CP_UTF8, 0, name.data(), -1, &buf[0], sz) > 0) { (void)ThreadNameProc(thread.native_handle(), buf.c_str()); } } } #endif } } // namespace fuzzer #endif // LIBFUZZER_WINDOWS diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp index b9b1f496df7c..be3b3bd94e2a 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp @@ -1,2620 +1,2643 @@ //===-- sanitizer_linux.cpp -----------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file is shared between AddressSanitizer and ThreadSanitizer // run-time libraries and implements linux-specific functions from // sanitizer_libc.h. //===----------------------------------------------------------------------===// #include "sanitizer_platform.h" #if SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_NETBSD || \ SANITIZER_SOLARIS # include "sanitizer_common.h" # include "sanitizer_flags.h" # include "sanitizer_getauxval.h" # include "sanitizer_internal_defs.h" # include "sanitizer_libc.h" # include "sanitizer_linux.h" # include "sanitizer_mutex.h" # include "sanitizer_placement_new.h" # include "sanitizer_procmaps.h" # if SANITIZER_LINUX && !SANITIZER_GO # include # endif // For mips64, syscall(__NR_stat) fills the buffer in the 'struct kernel_stat' // format. Struct kernel_stat is defined as 'struct stat' in asm/stat.h. To // access stat from asm/stat.h, without conflicting with definition in // sys/stat.h, we use this trick. sparc64 is similar, using // syscall(__NR_stat64) and struct kernel_stat64. # if SANITIZER_LINUX && (SANITIZER_MIPS64 || SANITIZER_SPARC64) # include # include # define stat kernel_stat # if SANITIZER_SPARC64 # define stat64 kernel_stat64 # endif # if SANITIZER_GO # undef st_atime # undef st_mtime # undef st_ctime # define st_atime st_atim # define st_mtime st_mtim # define st_ctime st_ctim # endif # include # undef stat # undef stat64 # endif # include # include # include # include # include # include # include # include # if !SANITIZER_SOLARIS # include # endif # include # include # include # include # include # include # include # if SANITIZER_LINUX # include # endif # if SANITIZER_LINUX && !SANITIZER_ANDROID # include # endif # if SANITIZER_LINUX && defined(__loongarch__) # include # endif # if SANITIZER_FREEBSD # include # include # include # include extern "C" { // must be included after and on // FreeBSD 9.2 and 10.0. # include } # include # endif // SANITIZER_FREEBSD # if SANITIZER_NETBSD # include // For NAME_MAX # include # include extern struct ps_strings *__ps_strings; # endif // SANITIZER_NETBSD # if SANITIZER_SOLARIS # include # include # define environ _environ # endif extern char **environ; # if SANITIZER_LINUX // struct kernel_timeval { long tv_sec; long tv_usec; }; // is broken on some linux distributions. const int FUTEX_WAIT = 0; const int FUTEX_WAKE = 1; const int FUTEX_PRIVATE_FLAG = 128; const int FUTEX_WAIT_PRIVATE = FUTEX_WAIT | FUTEX_PRIVATE_FLAG; const int FUTEX_WAKE_PRIVATE = FUTEX_WAKE | FUTEX_PRIVATE_FLAG; # endif // SANITIZER_LINUX // Are we using 32-bit or 64-bit Linux syscalls? // x32 (which defines __x86_64__) has SANITIZER_WORDSIZE == 32 // but it still needs to use 64-bit syscalls. # if SANITIZER_LINUX && (defined(__x86_64__) || defined(__powerpc64__) || \ SANITIZER_WORDSIZE == 64 || \ (defined(__mips__) && _MIPS_SIM == _ABIN32)) # define SANITIZER_LINUX_USES_64BIT_SYSCALLS 1 # else # define SANITIZER_LINUX_USES_64BIT_SYSCALLS 0 # endif // Note : FreeBSD implemented both Linux and OpenBSD apis. # if SANITIZER_LINUX && defined(__NR_getrandom) # if !defined(GRND_NONBLOCK) # define GRND_NONBLOCK 1 # endif # define SANITIZER_USE_GETRANDOM 1 # else # define SANITIZER_USE_GETRANDOM 0 # endif // SANITIZER_LINUX && defined(__NR_getrandom) # if SANITIZER_FREEBSD # define SANITIZER_USE_GETENTROPY 1 # endif namespace __sanitizer { void SetSigProcMask(__sanitizer_sigset_t *set, __sanitizer_sigset_t *oldset) { CHECK_EQ(0, internal_sigprocmask(SIG_SETMASK, set, oldset)); } +# if SANITIZER_LINUX +// Deletes the specified signal from newset, if it is not present in oldset +// Equivalently: newset[signum] = newset[signum] & oldset[signum] +static void KeepUnblocked(__sanitizer_sigset_t &newset, + __sanitizer_sigset_t &oldset, int signum) { + // FIXME: https://github.com/google/sanitizers/issues/1816 + if (SANITIZER_ANDROID || !internal_sigismember(&oldset, signum)) + internal_sigdelset(&newset, signum); +} +# endif + // Block asynchronous signals void BlockSignals(__sanitizer_sigset_t *oldset) { - __sanitizer_sigset_t set; - internal_sigfillset(&set); -# if SANITIZER_LINUX && !SANITIZER_ANDROID + __sanitizer_sigset_t newset; + internal_sigfillset(&newset); + +# if SANITIZER_LINUX + __sanitizer_sigset_t currentset; + +# if !SANITIZER_ANDROID + // FIXME: https://github.com/google/sanitizers/issues/1816 + SetSigProcMask(NULL, ¤tset); + // Glibc uses SIGSETXID signal during setuid call. If this signal is blocked // on any thread, setuid call hangs. // See test/sanitizer_common/TestCases/Linux/setuid.c. - internal_sigdelset(&set, 33); -# endif -# if SANITIZER_LINUX + KeepUnblocked(newset, currentset, 33); +# endif // !SANITIZER_ANDROID + // Seccomp-BPF-sandboxed processes rely on SIGSYS to handle trapped syscalls. // If this signal is blocked, such calls cannot be handled and the process may // hang. - internal_sigdelset(&set, 31); + KeepUnblocked(newset, currentset, 31); +# if !SANITIZER_ANDROID // Don't block synchronous signals - internal_sigdelset(&set, SIGSEGV); - internal_sigdelset(&set, SIGBUS); - internal_sigdelset(&set, SIGILL); - internal_sigdelset(&set, SIGTRAP); - internal_sigdelset(&set, SIGABRT); - internal_sigdelset(&set, SIGFPE); - internal_sigdelset(&set, SIGPIPE); -# endif + // but also don't unblock signals that the user had deliberately blocked. + // FIXME: https://github.com/google/sanitizers/issues/1816 + KeepUnblocked(newset, currentset, SIGSEGV); + KeepUnblocked(newset, currentset, SIGBUS); + KeepUnblocked(newset, currentset, SIGILL); + KeepUnblocked(newset, currentset, SIGTRAP); + KeepUnblocked(newset, currentset, SIGABRT); + KeepUnblocked(newset, currentset, SIGFPE); + KeepUnblocked(newset, currentset, SIGPIPE); +# endif //! SANITIZER_ANDROID + +# endif // SANITIZER_LINUX - SetSigProcMask(&set, oldset); + SetSigProcMask(&newset, oldset); } ScopedBlockSignals::ScopedBlockSignals(__sanitizer_sigset_t *copy) { BlockSignals(&saved_); if (copy) internal_memcpy(copy, &saved_, sizeof(saved_)); } ScopedBlockSignals::~ScopedBlockSignals() { SetSigProcMask(&saved_, nullptr); } # if SANITIZER_LINUX && defined(__x86_64__) # include "sanitizer_syscall_linux_x86_64.inc" # elif SANITIZER_LINUX && SANITIZER_RISCV64 # include "sanitizer_syscall_linux_riscv64.inc" # elif SANITIZER_LINUX && defined(__aarch64__) # include "sanitizer_syscall_linux_aarch64.inc" # elif SANITIZER_LINUX && defined(__arm__) # include "sanitizer_syscall_linux_arm.inc" # elif SANITIZER_LINUX && defined(__hexagon__) # include "sanitizer_syscall_linux_hexagon.inc" # elif SANITIZER_LINUX && SANITIZER_LOONGARCH64 # include "sanitizer_syscall_linux_loongarch64.inc" # else # include "sanitizer_syscall_generic.inc" # endif // --------------- sanitizer_libc.h # if !SANITIZER_SOLARIS && !SANITIZER_NETBSD # if !SANITIZER_S390 uptr internal_mmap(void *addr, uptr length, int prot, int flags, int fd, u64 offset) { # if SANITIZER_FREEBSD || SANITIZER_LINUX_USES_64BIT_SYSCALLS return internal_syscall(SYSCALL(mmap), (uptr)addr, length, prot, flags, fd, offset); # else // mmap2 specifies file offset in 4096-byte units. CHECK(IsAligned(offset, 4096)); return internal_syscall(SYSCALL(mmap2), addr, length, prot, flags, fd, (OFF_T)(offset / 4096)); # endif } # endif // !SANITIZER_S390 uptr internal_munmap(void *addr, uptr length) { return internal_syscall(SYSCALL(munmap), (uptr)addr, length); } # if SANITIZER_LINUX uptr internal_mremap(void *old_address, uptr old_size, uptr new_size, int flags, void *new_address) { return internal_syscall(SYSCALL(mremap), (uptr)old_address, old_size, new_size, flags, (uptr)new_address); } # endif int internal_mprotect(void *addr, uptr length, int prot) { return internal_syscall(SYSCALL(mprotect), (uptr)addr, length, prot); } int internal_madvise(uptr addr, uptr length, int advice) { return internal_syscall(SYSCALL(madvise), addr, length, advice); } uptr internal_close(fd_t fd) { return internal_syscall(SYSCALL(close), fd); } uptr internal_open(const char *filename, int flags) { # if SANITIZER_LINUX return internal_syscall(SYSCALL(openat), AT_FDCWD, (uptr)filename, flags); # else return internal_syscall(SYSCALL(open), (uptr)filename, flags); # endif } uptr internal_open(const char *filename, int flags, u32 mode) { # if SANITIZER_LINUX return internal_syscall(SYSCALL(openat), AT_FDCWD, (uptr)filename, flags, mode); # else return internal_syscall(SYSCALL(open), (uptr)filename, flags, mode); # endif } uptr internal_read(fd_t fd, void *buf, uptr count) { sptr res; HANDLE_EINTR(res, (sptr)internal_syscall(SYSCALL(read), fd, (uptr)buf, count)); return res; } uptr internal_write(fd_t fd, const void *buf, uptr count) { sptr res; HANDLE_EINTR(res, (sptr)internal_syscall(SYSCALL(write), fd, (uptr)buf, count)); return res; } uptr internal_ftruncate(fd_t fd, uptr size) { sptr res; HANDLE_EINTR(res, (sptr)internal_syscall(SYSCALL(ftruncate), fd, (OFF_T)size)); return res; } # if !SANITIZER_LINUX_USES_64BIT_SYSCALLS && SANITIZER_LINUX static void stat64_to_stat(struct stat64 *in, struct stat *out) { internal_memset(out, 0, sizeof(*out)); out->st_dev = in->st_dev; out->st_ino = in->st_ino; out->st_mode = in->st_mode; out->st_nlink = in->st_nlink; out->st_uid = in->st_uid; out->st_gid = in->st_gid; out->st_rdev = in->st_rdev; out->st_size = in->st_size; out->st_blksize = in->st_blksize; out->st_blocks = in->st_blocks; out->st_atime = in->st_atime; out->st_mtime = in->st_mtime; out->st_ctime = in->st_ctime; } # endif # if SANITIZER_LINUX && defined(__loongarch__) static void statx_to_stat(struct statx *in, struct stat *out) { internal_memset(out, 0, sizeof(*out)); out->st_dev = makedev(in->stx_dev_major, in->stx_dev_minor); out->st_ino = in->stx_ino; out->st_mode = in->stx_mode; out->st_nlink = in->stx_nlink; out->st_uid = in->stx_uid; out->st_gid = in->stx_gid; out->st_rdev = makedev(in->stx_rdev_major, in->stx_rdev_minor); out->st_size = in->stx_size; out->st_blksize = in->stx_blksize; out->st_blocks = in->stx_blocks; out->st_atime = in->stx_atime.tv_sec; out->st_atim.tv_nsec = in->stx_atime.tv_nsec; out->st_mtime = in->stx_mtime.tv_sec; out->st_mtim.tv_nsec = in->stx_mtime.tv_nsec; out->st_ctime = in->stx_ctime.tv_sec; out->st_ctim.tv_nsec = in->stx_ctime.tv_nsec; } # endif # if SANITIZER_MIPS64 || SANITIZER_SPARC64 # if SANITIZER_MIPS64 typedef struct kernel_stat kstat_t; # else typedef struct kernel_stat64 kstat_t; # endif // Undefine compatibility macros from // so that they would not clash with the kernel_stat // st_[a|m|c]time fields # if !SANITIZER_GO # undef st_atime # undef st_mtime # undef st_ctime # endif # if defined(SANITIZER_ANDROID) // Bionic sys/stat.h defines additional macros // for compatibility with the old NDKs and // they clash with the kernel_stat structure // st_[a|m|c]time_nsec fields. # undef st_atime_nsec # undef st_mtime_nsec # undef st_ctime_nsec # endif static void kernel_stat_to_stat(kstat_t *in, struct stat *out) { internal_memset(out, 0, sizeof(*out)); out->st_dev = in->st_dev; out->st_ino = in->st_ino; out->st_mode = in->st_mode; out->st_nlink = in->st_nlink; out->st_uid = in->st_uid; out->st_gid = in->st_gid; out->st_rdev = in->st_rdev; out->st_size = in->st_size; out->st_blksize = in->st_blksize; out->st_blocks = in->st_blocks; # if defined(__USE_MISC) || defined(__USE_XOPEN2K8) || \ defined(SANITIZER_ANDROID) out->st_atim.tv_sec = in->st_atime; out->st_atim.tv_nsec = in->st_atime_nsec; out->st_mtim.tv_sec = in->st_mtime; out->st_mtim.tv_nsec = in->st_mtime_nsec; out->st_ctim.tv_sec = in->st_ctime; out->st_ctim.tv_nsec = in->st_ctime_nsec; # else out->st_atime = in->st_atime; out->st_atimensec = in->st_atime_nsec; out->st_mtime = in->st_mtime; out->st_mtimensec = in->st_mtime_nsec; out->st_ctime = in->st_ctime; out->st_atimensec = in->st_ctime_nsec; # endif } # endif uptr internal_stat(const char *path, void *buf) { # if SANITIZER_FREEBSD return internal_syscall(SYSCALL(fstatat), AT_FDCWD, (uptr)path, (uptr)buf, 0); # elif SANITIZER_LINUX # if defined(__loongarch__) struct statx bufx; int res = internal_syscall(SYSCALL(statx), AT_FDCWD, (uptr)path, AT_NO_AUTOMOUNT, STATX_BASIC_STATS, (uptr)&bufx); statx_to_stat(&bufx, (struct stat *)buf); return res; # elif (SANITIZER_WORDSIZE == 64 || SANITIZER_X32 || \ (defined(__mips__) && _MIPS_SIM == _ABIN32)) && \ !SANITIZER_SPARC return internal_syscall(SYSCALL(newfstatat), AT_FDCWD, (uptr)path, (uptr)buf, 0); # elif SANITIZER_SPARC64 kstat_t buf64; int res = internal_syscall(SYSCALL(fstatat64), AT_FDCWD, (uptr)path, (uptr)&buf64, 0); kernel_stat_to_stat(&buf64, (struct stat *)buf); return res; # else struct stat64 buf64; int res = internal_syscall(SYSCALL(fstatat64), AT_FDCWD, (uptr)path, (uptr)&buf64, 0); stat64_to_stat(&buf64, (struct stat *)buf); return res; # endif # else struct stat64 buf64; int res = internal_syscall(SYSCALL(stat64), path, &buf64); stat64_to_stat(&buf64, (struct stat *)buf); return res; # endif } uptr internal_lstat(const char *path, void *buf) { # if SANITIZER_FREEBSD return internal_syscall(SYSCALL(fstatat), AT_FDCWD, (uptr)path, (uptr)buf, AT_SYMLINK_NOFOLLOW); # elif SANITIZER_LINUX # if defined(__loongarch__) struct statx bufx; int res = internal_syscall(SYSCALL(statx), AT_FDCWD, (uptr)path, AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT, STATX_BASIC_STATS, (uptr)&bufx); statx_to_stat(&bufx, (struct stat *)buf); return res; # elif (defined(_LP64) || SANITIZER_X32 || \ (defined(__mips__) && _MIPS_SIM == _ABIN32)) && \ !SANITIZER_SPARC return internal_syscall(SYSCALL(newfstatat), AT_FDCWD, (uptr)path, (uptr)buf, AT_SYMLINK_NOFOLLOW); # elif SANITIZER_SPARC64 kstat_t buf64; int res = internal_syscall(SYSCALL(fstatat64), AT_FDCWD, (uptr)path, (uptr)&buf64, AT_SYMLINK_NOFOLLOW); kernel_stat_to_stat(&buf64, (struct stat *)buf); return res; # else struct stat64 buf64; int res = internal_syscall(SYSCALL(fstatat64), AT_FDCWD, (uptr)path, (uptr)&buf64, AT_SYMLINK_NOFOLLOW); stat64_to_stat(&buf64, (struct stat *)buf); return res; # endif # else struct stat64 buf64; int res = internal_syscall(SYSCALL(lstat64), path, &buf64); stat64_to_stat(&buf64, (struct stat *)buf); return res; # endif } uptr internal_fstat(fd_t fd, void *buf) { # if SANITIZER_FREEBSD || SANITIZER_LINUX_USES_64BIT_SYSCALLS # if SANITIZER_MIPS64 // For mips64, fstat syscall fills buffer in the format of kernel_stat kstat_t kbuf; int res = internal_syscall(SYSCALL(fstat), fd, &kbuf); kernel_stat_to_stat(&kbuf, (struct stat *)buf); return res; # elif SANITIZER_LINUX && SANITIZER_SPARC64 // For sparc64, fstat64 syscall fills buffer in the format of kernel_stat64 kstat_t kbuf; int res = internal_syscall(SYSCALL(fstat64), fd, &kbuf); kernel_stat_to_stat(&kbuf, (struct stat *)buf); return res; # elif SANITIZER_LINUX && defined(__loongarch__) struct statx bufx; int res = internal_syscall(SYSCALL(statx), fd, "", AT_EMPTY_PATH, STATX_BASIC_STATS, (uptr)&bufx); statx_to_stat(&bufx, (struct stat *)buf); return res; # else return internal_syscall(SYSCALL(fstat), fd, (uptr)buf); # endif # else struct stat64 buf64; int res = internal_syscall(SYSCALL(fstat64), fd, &buf64); stat64_to_stat(&buf64, (struct stat *)buf); return res; # endif } uptr internal_filesize(fd_t fd) { struct stat st; if (internal_fstat(fd, &st)) return -1; return (uptr)st.st_size; } uptr internal_dup(int oldfd) { return internal_syscall(SYSCALL(dup), oldfd); } uptr internal_dup2(int oldfd, int newfd) { # if SANITIZER_LINUX return internal_syscall(SYSCALL(dup3), oldfd, newfd, 0); # else return internal_syscall(SYSCALL(dup2), oldfd, newfd); # endif } uptr internal_readlink(const char *path, char *buf, uptr bufsize) { # if SANITIZER_LINUX return internal_syscall(SYSCALL(readlinkat), AT_FDCWD, (uptr)path, (uptr)buf, bufsize); # else return internal_syscall(SYSCALL(readlink), (uptr)path, (uptr)buf, bufsize); # endif } uptr internal_unlink(const char *path) { # if SANITIZER_LINUX return internal_syscall(SYSCALL(unlinkat), AT_FDCWD, (uptr)path, 0); # else return internal_syscall(SYSCALL(unlink), (uptr)path); # endif } uptr internal_rename(const char *oldpath, const char *newpath) { # if (defined(__riscv) || defined(__loongarch__)) && defined(__linux__) return internal_syscall(SYSCALL(renameat2), AT_FDCWD, (uptr)oldpath, AT_FDCWD, (uptr)newpath, 0); # elif SANITIZER_LINUX return internal_syscall(SYSCALL(renameat), AT_FDCWD, (uptr)oldpath, AT_FDCWD, (uptr)newpath); # else return internal_syscall(SYSCALL(rename), (uptr)oldpath, (uptr)newpath); # endif } uptr internal_sched_yield() { return internal_syscall(SYSCALL(sched_yield)); } void internal_usleep(u64 useconds) { struct timespec ts; ts.tv_sec = useconds / 1000000; ts.tv_nsec = (useconds % 1000000) * 1000; internal_syscall(SYSCALL(nanosleep), &ts, &ts); } uptr internal_execve(const char *filename, char *const argv[], char *const envp[]) { return internal_syscall(SYSCALL(execve), (uptr)filename, (uptr)argv, (uptr)envp); } # endif // !SANITIZER_SOLARIS && !SANITIZER_NETBSD # if !SANITIZER_NETBSD void internal__exit(int exitcode) { # if SANITIZER_FREEBSD || SANITIZER_SOLARIS internal_syscall(SYSCALL(exit), exitcode); # else internal_syscall(SYSCALL(exit_group), exitcode); # endif Die(); // Unreachable. } # endif // !SANITIZER_NETBSD // ----------------- sanitizer_common.h bool FileExists(const char *filename) { if (ShouldMockFailureToOpen(filename)) return false; struct stat st; if (internal_stat(filename, &st)) return false; // Sanity check: filename is a regular file. return S_ISREG(st.st_mode); } bool DirExists(const char *path) { struct stat st; if (internal_stat(path, &st)) return false; return S_ISDIR(st.st_mode); } # if !SANITIZER_NETBSD tid_t GetTid() { # if SANITIZER_FREEBSD long Tid; thr_self(&Tid); return Tid; # elif SANITIZER_SOLARIS return thr_self(); # else return internal_syscall(SYSCALL(gettid)); # endif } int TgKill(pid_t pid, tid_t tid, int sig) { # if SANITIZER_LINUX return internal_syscall(SYSCALL(tgkill), pid, tid, sig); # elif SANITIZER_FREEBSD return internal_syscall(SYSCALL(thr_kill2), pid, tid, sig); # elif SANITIZER_SOLARIS (void)pid; errno = thr_kill(tid, sig); // TgKill is expected to return -1 on error, not an errno. return errno != 0 ? -1 : 0; # endif } # endif # if SANITIZER_GLIBC u64 NanoTime() { kernel_timeval tv; internal_memset(&tv, 0, sizeof(tv)); internal_syscall(SYSCALL(gettimeofday), &tv, 0); return (u64)tv.tv_sec * 1000 * 1000 * 1000 + tv.tv_usec * 1000; } // Used by real_clock_gettime. uptr internal_clock_gettime(__sanitizer_clockid_t clk_id, void *tp) { return internal_syscall(SYSCALL(clock_gettime), clk_id, tp); } # elif !SANITIZER_SOLARIS && !SANITIZER_NETBSD u64 NanoTime() { struct timespec ts; clock_gettime(CLOCK_REALTIME, &ts); return (u64)ts.tv_sec * 1000 * 1000 * 1000 + ts.tv_nsec; } # endif // Like getenv, but reads env directly from /proc (on Linux) or parses the // 'environ' array (on some others) and does not use libc. This function // should be called first inside __asan_init. const char *GetEnv(const char *name) { # if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_SOLARIS if (::environ != 0) { uptr NameLen = internal_strlen(name); for (char **Env = ::environ; *Env != 0; Env++) { if (internal_strncmp(*Env, name, NameLen) == 0 && (*Env)[NameLen] == '=') return (*Env) + NameLen + 1; } } return 0; // Not found. # elif SANITIZER_LINUX static char *environ; static uptr len; static bool inited; if (!inited) { inited = true; uptr environ_size; if (!ReadFileToBuffer("/proc/self/environ", &environ, &environ_size, &len)) environ = nullptr; } if (!environ || len == 0) return nullptr; uptr namelen = internal_strlen(name); const char *p = environ; while (*p != '\0') { // will happen at the \0\0 that terminates the buffer // proc file has the format NAME=value\0NAME=value\0NAME=value\0... const char *endp = (char *)internal_memchr(p, '\0', len - (p - environ)); if (!endp) // this entry isn't NUL terminated return nullptr; else if (!internal_memcmp(p, name, namelen) && p[namelen] == '=') // Match. return p + namelen + 1; // point after = p = endp + 1; } return nullptr; // Not found. # else # error "Unsupported platform" # endif } # if !SANITIZER_FREEBSD && !SANITIZER_NETBSD && !SANITIZER_GO extern "C" { SANITIZER_WEAK_ATTRIBUTE extern void *__libc_stack_end; } # endif # if !SANITIZER_FREEBSD && !SANITIZER_NETBSD static void ReadNullSepFileToArray(const char *path, char ***arr, int arr_size) { char *buff; uptr buff_size; uptr buff_len; *arr = (char **)MmapOrDie(arr_size * sizeof(char *), "NullSepFileArray"); if (!ReadFileToBuffer(path, &buff, &buff_size, &buff_len, 1024 * 1024)) { (*arr)[0] = nullptr; return; } (*arr)[0] = buff; int count, i; for (count = 1, i = 1;; i++) { if (buff[i] == 0) { if (buff[i + 1] == 0) break; (*arr)[count] = &buff[i + 1]; CHECK_LE(count, arr_size - 1); // FIXME: make this more flexible. count++; } } (*arr)[count] = nullptr; } # endif static void GetArgsAndEnv(char ***argv, char ***envp) { # if SANITIZER_FREEBSD // On FreeBSD, retrieving the argument and environment arrays is done via the // kern.ps_strings sysctl, which returns a pointer to a structure containing // this information. See also . ps_strings *pss; uptr sz = sizeof(pss); if (internal_sysctlbyname("kern.ps_strings", &pss, &sz, NULL, 0) == -1) { Printf("sysctl kern.ps_strings failed\n"); Die(); } *argv = pss->ps_argvstr; *envp = pss->ps_envstr; # elif SANITIZER_NETBSD *argv = __ps_strings->ps_argvstr; *envp = __ps_strings->ps_envstr; # else // SANITIZER_FREEBSD # if !SANITIZER_GO if (&__libc_stack_end) { uptr *stack_end = (uptr *)__libc_stack_end; // Normally argc can be obtained from *stack_end, however, on ARM glibc's // _start clobbers it: // https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/arm/start.S;hb=refs/heads/release/2.31/master#l75 // Do not special-case ARM and infer argc from argv everywhere. int argc = 0; while (stack_end[argc + 1]) argc++; *argv = (char **)(stack_end + 1); *envp = (char **)(stack_end + argc + 2); } else { # endif // !SANITIZER_GO static const int kMaxArgv = 2000, kMaxEnvp = 2000; ReadNullSepFileToArray("/proc/self/cmdline", argv, kMaxArgv); ReadNullSepFileToArray("/proc/self/environ", envp, kMaxEnvp); # if !SANITIZER_GO } # endif // !SANITIZER_GO # endif // SANITIZER_FREEBSD } char **GetArgv() { char **argv, **envp; GetArgsAndEnv(&argv, &envp); return argv; } char **GetEnviron() { char **argv, **envp; GetArgsAndEnv(&argv, &envp); return envp; } # if !SANITIZER_SOLARIS void FutexWait(atomic_uint32_t *p, u32 cmp) { # if SANITIZER_FREEBSD _umtx_op(p, UMTX_OP_WAIT_UINT, cmp, 0, 0); # elif SANITIZER_NETBSD sched_yield(); /* No userspace futex-like synchronization */ # else internal_syscall(SYSCALL(futex), (uptr)p, FUTEX_WAIT_PRIVATE, cmp, 0, 0, 0); # endif } void FutexWake(atomic_uint32_t *p, u32 count) { # if SANITIZER_FREEBSD _umtx_op(p, UMTX_OP_WAKE, count, 0, 0); # elif SANITIZER_NETBSD /* No userspace futex-like synchronization */ # else internal_syscall(SYSCALL(futex), (uptr)p, FUTEX_WAKE_PRIVATE, count, 0, 0, 0); # endif } # endif // !SANITIZER_SOLARIS // ----------------- sanitizer_linux.h // The actual size of this structure is specified by d_reclen. // Note that getdents64 uses a different structure format. We only provide the // 32-bit syscall here. # if SANITIZER_NETBSD // Not used # else struct linux_dirent { # if SANITIZER_X32 || SANITIZER_LINUX u64 d_ino; u64 d_off; # else unsigned long d_ino; unsigned long d_off; # endif unsigned short d_reclen; # if SANITIZER_LINUX unsigned char d_type; # endif char d_name[256]; }; # endif # if !SANITIZER_SOLARIS && !SANITIZER_NETBSD // Syscall wrappers. uptr internal_ptrace(int request, int pid, void *addr, void *data) { return internal_syscall(SYSCALL(ptrace), request, pid, (uptr)addr, (uptr)data); } uptr internal_waitpid(int pid, int *status, int options) { return internal_syscall(SYSCALL(wait4), pid, (uptr)status, options, 0 /* rusage */); } uptr internal_getpid() { return internal_syscall(SYSCALL(getpid)); } uptr internal_getppid() { return internal_syscall(SYSCALL(getppid)); } int internal_dlinfo(void *handle, int request, void *p) { # if SANITIZER_FREEBSD return dlinfo(handle, request, p); # else UNIMPLEMENTED(); # endif } uptr internal_getdents(fd_t fd, struct linux_dirent *dirp, unsigned int count) { # if SANITIZER_FREEBSD return internal_syscall(SYSCALL(getdirentries), fd, (uptr)dirp, count, NULL); # elif SANITIZER_LINUX return internal_syscall(SYSCALL(getdents64), fd, (uptr)dirp, count); # else return internal_syscall(SYSCALL(getdents), fd, (uptr)dirp, count); # endif } uptr internal_lseek(fd_t fd, OFF_T offset, int whence) { return internal_syscall(SYSCALL(lseek), fd, offset, whence); } # if SANITIZER_LINUX uptr internal_prctl(int option, uptr arg2, uptr arg3, uptr arg4, uptr arg5) { return internal_syscall(SYSCALL(prctl), option, arg2, arg3, arg4, arg5); } # if defined(__x86_64__) # include // Currently internal_arch_prctl() is only needed on x86_64. uptr internal_arch_prctl(int option, uptr arg2) { return internal_syscall(__NR_arch_prctl, option, arg2); } # endif # endif uptr internal_sigaltstack(const void *ss, void *oss) { return internal_syscall(SYSCALL(sigaltstack), (uptr)ss, (uptr)oss); } extern "C" pid_t __fork(void); int internal_fork() { # if SANITIZER_LINUX # if SANITIZER_S390 return internal_syscall(SYSCALL(clone), 0, SIGCHLD); # elif SANITIZER_SPARC // The clone syscall interface on SPARC differs massively from the rest, // so fall back to __fork. return __fork(); # else return internal_syscall(SYSCALL(clone), SIGCHLD, 0); # endif # else return internal_syscall(SYSCALL(fork)); # endif } # if SANITIZER_FREEBSD int internal_sysctl(const int *name, unsigned int namelen, void *oldp, uptr *oldlenp, const void *newp, uptr newlen) { return internal_syscall(SYSCALL(__sysctl), name, namelen, oldp, (size_t *)oldlenp, newp, (size_t)newlen); } int internal_sysctlbyname(const char *sname, void *oldp, uptr *oldlenp, const void *newp, uptr newlen) { // Note: this function can be called during startup, so we need to avoid // calling any interceptable functions. On FreeBSD >= 1300045 sysctlbyname() // is a real syscall, but for older versions it calls sysctlnametomib() // followed by sysctl(). To avoid calling the intercepted version and // asserting if this happens during startup, call the real sysctlnametomib() // followed by internal_sysctl() if the syscall is not available. # ifdef SYS___sysctlbyname return internal_syscall(SYSCALL(__sysctlbyname), sname, internal_strlen(sname), oldp, (size_t *)oldlenp, newp, (size_t)newlen); # else static decltype(sysctlnametomib) *real_sysctlnametomib = nullptr; if (!real_sysctlnametomib) real_sysctlnametomib = (decltype(sysctlnametomib) *)dlsym(RTLD_NEXT, "sysctlnametomib"); CHECK(real_sysctlnametomib); int oid[CTL_MAXNAME]; size_t len = CTL_MAXNAME; if (real_sysctlnametomib(sname, oid, &len) == -1) return (-1); return internal_sysctl(oid, len, oldp, oldlenp, newp, newlen); # endif } # endif # if SANITIZER_LINUX # define SA_RESTORER 0x04000000 // Doesn't set sa_restorer if the caller did not set it, so use with caution //(see below). int internal_sigaction_norestorer(int signum, const void *act, void *oldact) { __sanitizer_kernel_sigaction_t k_act, k_oldact; internal_memset(&k_act, 0, sizeof(__sanitizer_kernel_sigaction_t)); internal_memset(&k_oldact, 0, sizeof(__sanitizer_kernel_sigaction_t)); const __sanitizer_sigaction *u_act = (const __sanitizer_sigaction *)act; __sanitizer_sigaction *u_oldact = (__sanitizer_sigaction *)oldact; if (u_act) { k_act.handler = u_act->handler; k_act.sigaction = u_act->sigaction; internal_memcpy(&k_act.sa_mask, &u_act->sa_mask, sizeof(__sanitizer_kernel_sigset_t)); // Without SA_RESTORER kernel ignores the calls (probably returns EINVAL). k_act.sa_flags = u_act->sa_flags | SA_RESTORER; // FIXME: most often sa_restorer is unset, however the kernel requires it // to point to a valid signal restorer that calls the rt_sigreturn syscall. // If sa_restorer passed to the kernel is NULL, the program may crash upon // signal delivery or fail to unwind the stack in the signal handler. // libc implementation of sigaction() passes its own restorer to // rt_sigaction, so we need to do the same (we'll need to reimplement the // restorers; for x86_64 the restorer address can be obtained from // oldact->sa_restorer upon a call to sigaction(xxx, NULL, oldact). # if !SANITIZER_ANDROID || !SANITIZER_MIPS32 k_act.sa_restorer = u_act->sa_restorer; # endif } uptr result = internal_syscall(SYSCALL(rt_sigaction), (uptr)signum, (uptr)(u_act ? &k_act : nullptr), (uptr)(u_oldact ? &k_oldact : nullptr), (uptr)sizeof(__sanitizer_kernel_sigset_t)); if ((result == 0) && u_oldact) { u_oldact->handler = k_oldact.handler; u_oldact->sigaction = k_oldact.sigaction; internal_memcpy(&u_oldact->sa_mask, &k_oldact.sa_mask, sizeof(__sanitizer_kernel_sigset_t)); u_oldact->sa_flags = k_oldact.sa_flags; # if !SANITIZER_ANDROID || !SANITIZER_MIPS32 u_oldact->sa_restorer = k_oldact.sa_restorer; # endif } return result; } # endif // SANITIZER_LINUX uptr internal_sigprocmask(int how, __sanitizer_sigset_t *set, __sanitizer_sigset_t *oldset) { # if SANITIZER_FREEBSD return internal_syscall(SYSCALL(sigprocmask), how, set, oldset); # else __sanitizer_kernel_sigset_t *k_set = (__sanitizer_kernel_sigset_t *)set; __sanitizer_kernel_sigset_t *k_oldset = (__sanitizer_kernel_sigset_t *)oldset; return internal_syscall(SYSCALL(rt_sigprocmask), (uptr)how, (uptr)k_set, (uptr)k_oldset, sizeof(__sanitizer_kernel_sigset_t)); # endif } void internal_sigfillset(__sanitizer_sigset_t *set) { internal_memset(set, 0xff, sizeof(*set)); } void internal_sigemptyset(__sanitizer_sigset_t *set) { internal_memset(set, 0, sizeof(*set)); } # if SANITIZER_LINUX void internal_sigdelset(__sanitizer_sigset_t *set, int signum) { signum -= 1; CHECK_GE(signum, 0); CHECK_LT(signum, sizeof(*set) * 8); __sanitizer_kernel_sigset_t *k_set = (__sanitizer_kernel_sigset_t *)set; const uptr idx = signum / (sizeof(k_set->sig[0]) * 8); const uptr bit = signum % (sizeof(k_set->sig[0]) * 8); k_set->sig[idx] &= ~((uptr)1 << bit); } bool internal_sigismember(__sanitizer_sigset_t *set, int signum) { signum -= 1; CHECK_GE(signum, 0); CHECK_LT(signum, sizeof(*set) * 8); __sanitizer_kernel_sigset_t *k_set = (__sanitizer_kernel_sigset_t *)set; const uptr idx = signum / (sizeof(k_set->sig[0]) * 8); const uptr bit = signum % (sizeof(k_set->sig[0]) * 8); return k_set->sig[idx] & ((uptr)1 << bit); } # elif SANITIZER_FREEBSD uptr internal_procctl(int type, int id, int cmd, void *data) { return internal_syscall(SYSCALL(procctl), type, id, cmd, data); } void internal_sigdelset(__sanitizer_sigset_t *set, int signum) { sigset_t *rset = reinterpret_cast(set); sigdelset(rset, signum); } bool internal_sigismember(__sanitizer_sigset_t *set, int signum) { sigset_t *rset = reinterpret_cast(set); return sigismember(rset, signum); } # endif # endif // !SANITIZER_SOLARIS # if !SANITIZER_NETBSD // ThreadLister implementation. ThreadLister::ThreadLister(pid_t pid) : pid_(pid), buffer_(4096) { char task_directory_path[80]; internal_snprintf(task_directory_path, sizeof(task_directory_path), "/proc/%d/task/", pid); descriptor_ = internal_open(task_directory_path, O_RDONLY | O_DIRECTORY); if (internal_iserror(descriptor_)) { Report("Can't open /proc/%d/task for reading.\n", pid); } } ThreadLister::Result ThreadLister::ListThreads( InternalMmapVector *threads) { if (internal_iserror(descriptor_)) return Error; internal_lseek(descriptor_, 0, SEEK_SET); threads->clear(); Result result = Ok; for (bool first_read = true;; first_read = false) { // Resize to max capacity if it was downsized by IsAlive. buffer_.resize(buffer_.capacity()); CHECK_GE(buffer_.size(), 4096); uptr read = internal_getdents( descriptor_, (struct linux_dirent *)buffer_.data(), buffer_.size()); if (!read) return result; if (internal_iserror(read)) { Report("Can't read directory entries from /proc/%d/task.\n", pid_); return Error; } for (uptr begin = (uptr)buffer_.data(), end = begin + read; begin < end;) { struct linux_dirent *entry = (struct linux_dirent *)begin; begin += entry->d_reclen; if (entry->d_ino == 1) { // Inode 1 is for bad blocks and also can be a reason for early return. // Should be emitted if kernel tried to output terminating thread. // See proc_task_readdir implementation in Linux. result = Incomplete; } if (entry->d_ino && *entry->d_name >= '0' && *entry->d_name <= '9') threads->push_back(internal_atoll(entry->d_name)); } // Now we are going to detect short-read or early EOF. In such cases Linux // can return inconsistent list with missing alive threads. // Code will just remember that the list can be incomplete but it will // continue reads to return as much as possible. if (!first_read) { // The first one was a short-read by definition. result = Incomplete; } else if (read > buffer_.size() - 1024) { // Read was close to the buffer size. So double the size and assume the // worst. buffer_.resize(buffer_.size() * 2); result = Incomplete; } else if (!threads->empty() && !IsAlive(threads->back())) { // Maybe Linux early returned from read on terminated thread (!pid_alive) // and failed to restore read position. // See next_tid and proc_task_instantiate in Linux. result = Incomplete; } } } bool ThreadLister::IsAlive(int tid) { // /proc/%d/task/%d/status uses same call to detect alive threads as // proc_task_readdir. See task_state implementation in Linux. char path[80]; internal_snprintf(path, sizeof(path), "/proc/%d/task/%d/status", pid_, tid); if (!ReadFileToVector(path, &buffer_) || buffer_.empty()) return false; buffer_.push_back(0); static const char kPrefix[] = "\nPPid:"; const char *field = internal_strstr(buffer_.data(), kPrefix); if (!field) return false; field += internal_strlen(kPrefix); return (int)internal_atoll(field) != 0; } ThreadLister::~ThreadLister() { if (!internal_iserror(descriptor_)) internal_close(descriptor_); } # endif # if SANITIZER_WORDSIZE == 32 // Take care of unusable kernel area in top gigabyte. static uptr GetKernelAreaSize() { # if SANITIZER_LINUX && !SANITIZER_X32 const uptr gbyte = 1UL << 30; // Firstly check if there are writable segments // mapped to top gigabyte (e.g. stack). MemoryMappingLayout proc_maps(/*cache_enabled*/ true); if (proc_maps.Error()) return 0; MemoryMappedSegment segment; while (proc_maps.Next(&segment)) { if ((segment.end >= 3 * gbyte) && segment.IsWritable()) return 0; } # if !SANITIZER_ANDROID // Even if nothing is mapped, top Gb may still be accessible // if we are running on 64-bit kernel. // Uname may report misleading results if personality type // is modified (e.g. under schroot) so check this as well. struct utsname uname_info; int pers = personality(0xffffffffUL); if (!(pers & PER_MASK) && internal_uname(&uname_info) == 0 && internal_strstr(uname_info.machine, "64")) return 0; # endif // SANITIZER_ANDROID // Top gigabyte is reserved for kernel. return gbyte; # else return 0; # endif // SANITIZER_LINUX && !SANITIZER_X32 } # endif // SANITIZER_WORDSIZE == 32 uptr GetMaxVirtualAddress() { # if SANITIZER_NETBSD && defined(__x86_64__) return 0x7f7ffffff000ULL; // (0x00007f8000000000 - PAGE_SIZE) # elif SANITIZER_WORDSIZE == 64 # if defined(__powerpc64__) || defined(__aarch64__) || \ defined(__loongarch__) || SANITIZER_RISCV64 // On PowerPC64 we have two different address space layouts: 44- and 46-bit. // We somehow need to figure out which one we are using now and choose // one of 0x00000fffffffffffUL and 0x00003fffffffffffUL. // Note that with 'ulimit -s unlimited' the stack is moved away from the top // of the address space, so simply checking the stack address is not enough. // This should (does) work for both PowerPC64 Endian modes. // Similarly, aarch64 has multiple address space layouts: 39, 42 and 47-bit. // loongarch64 also has multiple address space layouts: default is 47-bit. // RISC-V 64 also has multiple address space layouts: 39, 48 and 57-bit. return (1ULL << (MostSignificantSetBitIndex(GET_CURRENT_FRAME()) + 1)) - 1; # elif SANITIZER_MIPS64 return (1ULL << 40) - 1; // 0x000000ffffffffffUL; # elif defined(__s390x__) return (1ULL << 53) - 1; // 0x001fffffffffffffUL; # elif defined(__sparc__) return ~(uptr)0; # else return (1ULL << 47) - 1; // 0x00007fffffffffffUL; # endif # else // SANITIZER_WORDSIZE == 32 # if defined(__s390__) return (1ULL << 31) - 1; // 0x7fffffff; # else return (1ULL << 32) - 1; // 0xffffffff; # endif # endif // SANITIZER_WORDSIZE } uptr GetMaxUserVirtualAddress() { uptr addr = GetMaxVirtualAddress(); # if SANITIZER_WORDSIZE == 32 && !defined(__s390__) if (!common_flags()->full_address_space) addr -= GetKernelAreaSize(); CHECK_LT(reinterpret_cast(&addr), addr); # endif return addr; } # if !SANITIZER_ANDROID || defined(__aarch64__) uptr GetPageSize() { # if SANITIZER_LINUX && (defined(__x86_64__) || defined(__i386__)) && \ defined(EXEC_PAGESIZE) return EXEC_PAGESIZE; # elif SANITIZER_FREEBSD || SANITIZER_NETBSD // Use sysctl as sysconf can trigger interceptors internally. int pz = 0; uptr pzl = sizeof(pz); int mib[2] = {CTL_HW, HW_PAGESIZE}; int rv = internal_sysctl(mib, 2, &pz, &pzl, nullptr, 0); CHECK_EQ(rv, 0); return (uptr)pz; # elif SANITIZER_USE_GETAUXVAL return getauxval(AT_PAGESZ); # else return sysconf(_SC_PAGESIZE); // EXEC_PAGESIZE may not be trustworthy. # endif } # endif uptr ReadBinaryName(/*out*/ char *buf, uptr buf_len) { # if SANITIZER_SOLARIS const char *default_module_name = getexecname(); CHECK_NE(default_module_name, NULL); return internal_snprintf(buf, buf_len, "%s", default_module_name); # else # if SANITIZER_FREEBSD || SANITIZER_NETBSD # if SANITIZER_FREEBSD const int Mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1}; # else const int Mib[4] = {CTL_KERN, KERN_PROC_ARGS, -1, KERN_PROC_PATHNAME}; # endif const char *default_module_name = "kern.proc.pathname"; uptr Size = buf_len; bool IsErr = (internal_sysctl(Mib, ARRAY_SIZE(Mib), buf, &Size, NULL, 0) != 0); int readlink_error = IsErr ? errno : 0; uptr module_name_len = Size; # else const char *default_module_name = "/proc/self/exe"; uptr module_name_len = internal_readlink(default_module_name, buf, buf_len); int readlink_error; bool IsErr = internal_iserror(module_name_len, &readlink_error); # endif if (IsErr) { // We can't read binary name for some reason, assume it's unknown. Report( "WARNING: reading executable name failed with errno %d, " "some stack frames may not be symbolized\n", readlink_error); module_name_len = internal_snprintf(buf, buf_len, "%s", default_module_name); CHECK_LT(module_name_len, buf_len); } return module_name_len; # endif } uptr ReadLongProcessName(/*out*/ char *buf, uptr buf_len) { # if SANITIZER_LINUX char *tmpbuf; uptr tmpsize; uptr tmplen; if (ReadFileToBuffer("/proc/self/cmdline", &tmpbuf, &tmpsize, &tmplen, 1024 * 1024)) { internal_strncpy(buf, tmpbuf, buf_len); UnmapOrDie(tmpbuf, tmpsize); return internal_strlen(buf); } # endif return ReadBinaryName(buf, buf_len); } // Match full names of the form /path/to/base_name{-,.}* bool LibraryNameIs(const char *full_name, const char *base_name) { const char *name = full_name; // Strip path. while (*name != '\0') name++; while (name > full_name && *name != '/') name--; if (*name == '/') name++; uptr base_name_length = internal_strlen(base_name); if (internal_strncmp(name, base_name, base_name_length)) return false; return (name[base_name_length] == '-' || name[base_name_length] == '.'); } # if !SANITIZER_ANDROID // Call cb for each region mapped by map. void ForEachMappedRegion(link_map *map, void (*cb)(const void *, uptr)) { CHECK_NE(map, nullptr); # if !SANITIZER_FREEBSD typedef ElfW(Phdr) Elf_Phdr; typedef ElfW(Ehdr) Elf_Ehdr; # endif // !SANITIZER_FREEBSD char *base = (char *)map->l_addr; Elf_Ehdr *ehdr = (Elf_Ehdr *)base; char *phdrs = base + ehdr->e_phoff; char *phdrs_end = phdrs + ehdr->e_phnum * ehdr->e_phentsize; // Find the segment with the minimum base so we can "relocate" the p_vaddr // fields. Typically ET_DYN objects (DSOs) have base of zero and ET_EXEC // objects have a non-zero base. uptr preferred_base = (uptr)-1; for (char *iter = phdrs; iter != phdrs_end; iter += ehdr->e_phentsize) { Elf_Phdr *phdr = (Elf_Phdr *)iter; if (phdr->p_type == PT_LOAD && preferred_base > (uptr)phdr->p_vaddr) preferred_base = (uptr)phdr->p_vaddr; } // Compute the delta from the real base to get a relocation delta. sptr delta = (uptr)base - preferred_base; // Now we can figure out what the loader really mapped. for (char *iter = phdrs; iter != phdrs_end; iter += ehdr->e_phentsize) { Elf_Phdr *phdr = (Elf_Phdr *)iter; if (phdr->p_type == PT_LOAD) { uptr seg_start = phdr->p_vaddr + delta; uptr seg_end = seg_start + phdr->p_memsz; // None of these values are aligned. We consider the ragged edges of the // load command as defined, since they are mapped from the file. seg_start = RoundDownTo(seg_start, GetPageSizeCached()); seg_end = RoundUpTo(seg_end, GetPageSizeCached()); cb((void *)seg_start, seg_end - seg_start); } } } # endif # if SANITIZER_LINUX # if defined(__x86_64__) // We cannot use glibc's clone wrapper, because it messes with the child // task's TLS. It writes the PID and TID of the child task to its thread // descriptor, but in our case the child task shares the thread descriptor with // the parent (because we don't know how to allocate a new thread // descriptor to keep glibc happy). So the stock version of clone(), when // used with CLONE_VM, would end up corrupting the parent's thread descriptor. uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg, int *parent_tidptr, void *newtls, int *child_tidptr) { long long res; if (!fn || !child_stack) return -EINVAL; CHECK_EQ(0, (uptr)child_stack % 16); child_stack = (char *)child_stack - 2 * sizeof(unsigned long long); ((unsigned long long *)child_stack)[0] = (uptr)fn; ((unsigned long long *)child_stack)[1] = (uptr)arg; register void *r8 __asm__("r8") = newtls; register int *r10 __asm__("r10") = child_tidptr; __asm__ __volatile__( /* %rax = syscall(%rax = SYSCALL(clone), * %rdi = flags, * %rsi = child_stack, * %rdx = parent_tidptr, * %r8 = new_tls, * %r10 = child_tidptr) */ "syscall\n" /* if (%rax != 0) * return; */ "testq %%rax,%%rax\n" "jnz 1f\n" /* In the child. Terminate unwind chain. */ // XXX: We should also terminate the CFI unwind chain // here. Unfortunately clang 3.2 doesn't support the // necessary CFI directives, so we skip that part. "xorq %%rbp,%%rbp\n" /* Call "fn(arg)". */ "popq %%rax\n" "popq %%rdi\n" "call *%%rax\n" /* Call _exit(%rax). */ "movq %%rax,%%rdi\n" "movq %2,%%rax\n" "syscall\n" /* Return to parent. */ "1:\n" : "=a"(res) : "a"(SYSCALL(clone)), "i"(SYSCALL(exit)), "S"(child_stack), "D"(flags), "d"(parent_tidptr), "r"(r8), "r"(r10) : "memory", "r11", "rcx"); return res; } # elif defined(__mips__) uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg, int *parent_tidptr, void *newtls, int *child_tidptr) { long long res; if (!fn || !child_stack) return -EINVAL; CHECK_EQ(0, (uptr)child_stack % 16); child_stack = (char *)child_stack - 2 * sizeof(unsigned long long); ((unsigned long long *)child_stack)[0] = (uptr)fn; ((unsigned long long *)child_stack)[1] = (uptr)arg; register void *a3 __asm__("$7") = newtls; register int *a4 __asm__("$8") = child_tidptr; // We don't have proper CFI directives here because it requires alot of code // for very marginal benefits. __asm__ __volatile__( /* $v0 = syscall($v0 = __NR_clone, * $a0 = flags, * $a1 = child_stack, * $a2 = parent_tidptr, * $a3 = new_tls, * $a4 = child_tidptr) */ ".cprestore 16;\n" "move $4,%1;\n" "move $5,%2;\n" "move $6,%3;\n" "move $7,%4;\n" /* Store the fifth argument on stack * if we are using 32-bit abi. */ # if SANITIZER_WORDSIZE == 32 "lw %5,16($29);\n" # else "move $8,%5;\n" # endif "li $2,%6;\n" "syscall;\n" /* if ($v0 != 0) * return; */ "bnez $2,1f;\n" /* Call "fn(arg)". */ # if SANITIZER_WORDSIZE == 32 # ifdef __BIG_ENDIAN__ "lw $25,4($29);\n" "lw $4,12($29);\n" # else "lw $25,0($29);\n" "lw $4,8($29);\n" # endif # else "ld $25,0($29);\n" "ld $4,8($29);\n" # endif "jal $25;\n" /* Call _exit($v0). */ "move $4,$2;\n" "li $2,%7;\n" "syscall;\n" /* Return to parent. */ "1:\n" : "=r"(res) : "r"(flags), "r"(child_stack), "r"(parent_tidptr), "r"(a3), "r"(a4), "i"(__NR_clone), "i"(__NR_exit) : "memory", "$29"); return res; } # elif SANITIZER_RISCV64 uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg, int *parent_tidptr, void *newtls, int *child_tidptr) { if (!fn || !child_stack) return -EINVAL; CHECK_EQ(0, (uptr)child_stack % 16); register int res __asm__("a0"); register int __flags __asm__("a0") = flags; register void *__stack __asm__("a1") = child_stack; register int *__ptid __asm__("a2") = parent_tidptr; register void *__tls __asm__("a3") = newtls; register int *__ctid __asm__("a4") = child_tidptr; register int (*__fn)(void *) __asm__("a5") = fn; register void *__arg __asm__("a6") = arg; register int nr_clone __asm__("a7") = __NR_clone; __asm__ __volatile__( "ecall\n" /* if (a0 != 0) * return a0; */ "bnez a0, 1f\n" // In the child, now. Call "fn(arg)". "mv a0, a6\n" "jalr a5\n" // Call _exit(a0). "addi a7, zero, %9\n" "ecall\n" "1:\n" : "=r"(res) : "0"(__flags), "r"(__stack), "r"(__ptid), "r"(__tls), "r"(__ctid), "r"(__fn), "r"(__arg), "r"(nr_clone), "i"(__NR_exit) : "memory"); return res; } # elif defined(__aarch64__) uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg, int *parent_tidptr, void *newtls, int *child_tidptr) { register long long res __asm__("x0"); if (!fn || !child_stack) return -EINVAL; CHECK_EQ(0, (uptr)child_stack % 16); child_stack = (char *)child_stack - 2 * sizeof(unsigned long long); ((unsigned long long *)child_stack)[0] = (uptr)fn; ((unsigned long long *)child_stack)[1] = (uptr)arg; register int (*__fn)(void *) __asm__("x0") = fn; register void *__stack __asm__("x1") = child_stack; register int __flags __asm__("x2") = flags; register void *__arg __asm__("x3") = arg; register int *__ptid __asm__("x4") = parent_tidptr; register void *__tls __asm__("x5") = newtls; register int *__ctid __asm__("x6") = child_tidptr; __asm__ __volatile__( "mov x0,x2\n" /* flags */ "mov x2,x4\n" /* ptid */ "mov x3,x5\n" /* tls */ "mov x4,x6\n" /* ctid */ "mov x8,%9\n" /* clone */ "svc 0x0\n" /* if (%r0 != 0) * return %r0; */ "cmp x0, #0\n" "bne 1f\n" /* In the child, now. Call "fn(arg)". */ "ldp x1, x0, [sp], #16\n" "blr x1\n" /* Call _exit(%r0). */ "mov x8, %10\n" "svc 0x0\n" "1:\n" : "=r"(res) : "i"(-EINVAL), "r"(__fn), "r"(__stack), "r"(__flags), "r"(__arg), "r"(__ptid), "r"(__tls), "r"(__ctid), "i"(__NR_clone), "i"(__NR_exit) : "x30", "memory"); return res; } # elif SANITIZER_LOONGARCH64 uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg, int *parent_tidptr, void *newtls, int *child_tidptr) { if (!fn || !child_stack) return -EINVAL; CHECK_EQ(0, (uptr)child_stack % 16); register int res __asm__("$a0"); register int __flags __asm__("$a0") = flags; register void *__stack __asm__("$a1") = child_stack; register int *__ptid __asm__("$a2") = parent_tidptr; register int *__ctid __asm__("$a3") = child_tidptr; register void *__tls __asm__("$a4") = newtls; register int (*__fn)(void *) __asm__("$a5") = fn; register void *__arg __asm__("$a6") = arg; register int nr_clone __asm__("$a7") = __NR_clone; __asm__ __volatile__( "syscall 0\n" // if ($a0 != 0) // return $a0; "bnez $a0, 1f\n" // In the child, now. Call "fn(arg)". "move $a0, $a6\n" "jirl $ra, $a5, 0\n" // Call _exit($a0). "addi.d $a7, $zero, %9\n" "syscall 0\n" "1:\n" : "=r"(res) : "0"(__flags), "r"(__stack), "r"(__ptid), "r"(__ctid), "r"(__tls), "r"(__fn), "r"(__arg), "r"(nr_clone), "i"(__NR_exit) : "memory", "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t8"); return res; } # elif defined(__powerpc64__) uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg, int *parent_tidptr, void *newtls, int *child_tidptr) { long long res; // Stack frame structure. # if SANITIZER_PPC64V1 // Back chain == 0 (SP + 112) // Frame (112 bytes): // Parameter save area (SP + 48), 8 doublewords // TOC save area (SP + 40) // Link editor doubleword (SP + 32) // Compiler doubleword (SP + 24) // LR save area (SP + 16) // CR save area (SP + 8) // Back chain (SP + 0) # define FRAME_SIZE 112 # define FRAME_TOC_SAVE_OFFSET 40 # elif SANITIZER_PPC64V2 // Back chain == 0 (SP + 32) // Frame (32 bytes): // TOC save area (SP + 24) // LR save area (SP + 16) // CR save area (SP + 8) // Back chain (SP + 0) # define FRAME_SIZE 32 # define FRAME_TOC_SAVE_OFFSET 24 # else # error "Unsupported PPC64 ABI" # endif if (!fn || !child_stack) return -EINVAL; CHECK_EQ(0, (uptr)child_stack % 16); register int (*__fn)(void *) __asm__("r3") = fn; register void *__cstack __asm__("r4") = child_stack; register int __flags __asm__("r5") = flags; register void *__arg __asm__("r6") = arg; register int *__ptidptr __asm__("r7") = parent_tidptr; register void *__newtls __asm__("r8") = newtls; register int *__ctidptr __asm__("r9") = child_tidptr; __asm__ __volatile__( /* fn and arg are saved across the syscall */ "mr 28, %5\n\t" "mr 27, %8\n\t" /* syscall r0 == __NR_clone r3 == flags r4 == child_stack r5 == parent_tidptr r6 == newtls r7 == child_tidptr */ "mr 3, %7\n\t" "mr 5, %9\n\t" "mr 6, %10\n\t" "mr 7, %11\n\t" "li 0, %3\n\t" "sc\n\t" /* Test if syscall was successful */ "cmpdi cr1, 3, 0\n\t" "crandc cr1*4+eq, cr1*4+eq, cr0*4+so\n\t" "bne- cr1, 1f\n\t" /* Set up stack frame */ "li 29, 0\n\t" "stdu 29, -8(1)\n\t" "stdu 1, -%12(1)\n\t" /* Do the function call */ "std 2, %13(1)\n\t" # if SANITIZER_PPC64V1 "ld 0, 0(28)\n\t" "ld 2, 8(28)\n\t" "mtctr 0\n\t" # elif SANITIZER_PPC64V2 "mr 12, 28\n\t" "mtctr 12\n\t" # else # error "Unsupported PPC64 ABI" # endif "mr 3, 27\n\t" "bctrl\n\t" "ld 2, %13(1)\n\t" /* Call _exit(r3) */ "li 0, %4\n\t" "sc\n\t" /* Return to parent */ "1:\n\t" "mr %0, 3\n\t" : "=r"(res) : "0"(-1), "i"(EINVAL), "i"(__NR_clone), "i"(__NR_exit), "r"(__fn), "r"(__cstack), "r"(__flags), "r"(__arg), "r"(__ptidptr), "r"(__newtls), "r"(__ctidptr), "i"(FRAME_SIZE), "i"(FRAME_TOC_SAVE_OFFSET) : "cr0", "cr1", "memory", "ctr", "r0", "r27", "r28", "r29"); return res; } # elif defined(__i386__) uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg, int *parent_tidptr, void *newtls, int *child_tidptr) { int res; if (!fn || !child_stack) return -EINVAL; CHECK_EQ(0, (uptr)child_stack % 16); child_stack = (char *)child_stack - 7 * sizeof(unsigned int); ((unsigned int *)child_stack)[0] = (uptr)flags; ((unsigned int *)child_stack)[1] = (uptr)0; ((unsigned int *)child_stack)[2] = (uptr)fn; ((unsigned int *)child_stack)[3] = (uptr)arg; __asm__ __volatile__( /* %eax = syscall(%eax = SYSCALL(clone), * %ebx = flags, * %ecx = child_stack, * %edx = parent_tidptr, * %esi = new_tls, * %edi = child_tidptr) */ /* Obtain flags */ "movl (%%ecx), %%ebx\n" /* Do the system call */ "pushl %%ebx\n" "pushl %%esi\n" "pushl %%edi\n" /* Remember the flag value. */ "movl %%ebx, (%%ecx)\n" "int $0x80\n" "popl %%edi\n" "popl %%esi\n" "popl %%ebx\n" /* if (%eax != 0) * return; */ "test %%eax,%%eax\n" "jnz 1f\n" /* terminate the stack frame */ "xorl %%ebp,%%ebp\n" /* Call FN. */ "call *%%ebx\n" # ifdef PIC "call here\n" "here:\n" "popl %%ebx\n" "addl $_GLOBAL_OFFSET_TABLE_+[.-here], %%ebx\n" # endif /* Call exit */ "movl %%eax, %%ebx\n" "movl %2, %%eax\n" "int $0x80\n" "1:\n" : "=a"(res) : "a"(SYSCALL(clone)), "i"(SYSCALL(exit)), "c"(child_stack), "d"(parent_tidptr), "S"(newtls), "D"(child_tidptr) : "memory"); return res; } # elif defined(__arm__) uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg, int *parent_tidptr, void *newtls, int *child_tidptr) { unsigned int res; if (!fn || !child_stack) return -EINVAL; child_stack = (char *)child_stack - 2 * sizeof(unsigned int); ((unsigned int *)child_stack)[0] = (uptr)fn; ((unsigned int *)child_stack)[1] = (uptr)arg; register int r0 __asm__("r0") = flags; register void *r1 __asm__("r1") = child_stack; register int *r2 __asm__("r2") = parent_tidptr; register void *r3 __asm__("r3") = newtls; register int *r4 __asm__("r4") = child_tidptr; register int r7 __asm__("r7") = __NR_clone; # if __ARM_ARCH > 4 || defined(__ARM_ARCH_4T__) # define ARCH_HAS_BX # endif # if __ARM_ARCH > 4 # define ARCH_HAS_BLX # endif # ifdef ARCH_HAS_BX # ifdef ARCH_HAS_BLX # define BLX(R) "blx " #R "\n" # else # define BLX(R) "mov lr, pc; bx " #R "\n" # endif # else # define BLX(R) "mov lr, pc; mov pc," #R "\n" # endif __asm__ __volatile__( /* %r0 = syscall(%r7 = SYSCALL(clone), * %r0 = flags, * %r1 = child_stack, * %r2 = parent_tidptr, * %r3 = new_tls, * %r4 = child_tidptr) */ /* Do the system call */ "swi 0x0\n" /* if (%r0 != 0) * return %r0; */ "cmp r0, #0\n" "bne 1f\n" /* In the child, now. Call "fn(arg)". */ "ldr r0, [sp, #4]\n" "ldr ip, [sp], #8\n" BLX(ip) /* Call _exit(%r0). */ "mov r7, %7\n" "swi 0x0\n" "1:\n" "mov %0, r0\n" : "=r"(res) : "r"(r0), "r"(r1), "r"(r2), "r"(r3), "r"(r4), "r"(r7), "i"(__NR_exit) : "memory"); return res; } # endif # endif // SANITIZER_LINUX # if SANITIZER_LINUX int internal_uname(struct utsname *buf) { return internal_syscall(SYSCALL(uname), buf); } # endif # if SANITIZER_ANDROID # if __ANDROID_API__ < 21 extern "C" __attribute__((weak)) int dl_iterate_phdr( int (*)(struct dl_phdr_info *, size_t, void *), void *); # endif static int dl_iterate_phdr_test_cb(struct dl_phdr_info *info, size_t size, void *data) { // Any name starting with "lib" indicates a bug in L where library base names // are returned instead of paths. if (info->dlpi_name && info->dlpi_name[0] == 'l' && info->dlpi_name[1] == 'i' && info->dlpi_name[2] == 'b') { *(bool *)data = true; return 1; } return 0; } static atomic_uint32_t android_api_level; static AndroidApiLevel AndroidDetectApiLevelStatic() { # if __ANDROID_API__ <= 19 return ANDROID_KITKAT; # elif __ANDROID_API__ <= 22 return ANDROID_LOLLIPOP_MR1; # else return ANDROID_POST_LOLLIPOP; # endif } static AndroidApiLevel AndroidDetectApiLevel() { if (!&dl_iterate_phdr) return ANDROID_KITKAT; // K or lower bool base_name_seen = false; dl_iterate_phdr(dl_iterate_phdr_test_cb, &base_name_seen); if (base_name_seen) return ANDROID_LOLLIPOP_MR1; // L MR1 return ANDROID_POST_LOLLIPOP; // post-L // Plain L (API level 21) is completely broken wrt ASan and not very // interesting to detect. } extern "C" __attribute__((weak)) void *_DYNAMIC; AndroidApiLevel AndroidGetApiLevel() { AndroidApiLevel level = (AndroidApiLevel)atomic_load(&android_api_level, memory_order_relaxed); if (level) return level; level = &_DYNAMIC == nullptr ? AndroidDetectApiLevelStatic() : AndroidDetectApiLevel(); atomic_store(&android_api_level, level, memory_order_relaxed); return level; } # endif static HandleSignalMode GetHandleSignalModeImpl(int signum) { switch (signum) { case SIGABRT: return common_flags()->handle_abort; case SIGILL: return common_flags()->handle_sigill; case SIGTRAP: return common_flags()->handle_sigtrap; case SIGFPE: return common_flags()->handle_sigfpe; case SIGSEGV: return common_flags()->handle_segv; case SIGBUS: return common_flags()->handle_sigbus; } return kHandleSignalNo; } HandleSignalMode GetHandleSignalMode(int signum) { HandleSignalMode result = GetHandleSignalModeImpl(signum); if (result == kHandleSignalYes && !common_flags()->allow_user_segv_handler) return kHandleSignalExclusive; return result; } # if !SANITIZER_GO void *internal_start_thread(void *(*func)(void *arg), void *arg) { if (&internal_pthread_create == 0) return nullptr; // Start the thread with signals blocked, otherwise it can steal user signals. ScopedBlockSignals block(nullptr); void *th; internal_pthread_create(&th, nullptr, func, arg); return th; } void internal_join_thread(void *th) { if (&internal_pthread_join) internal_pthread_join(th, nullptr); } # else void *internal_start_thread(void *(*func)(void *), void *arg) { return 0; } void internal_join_thread(void *th) {} # endif # if SANITIZER_LINUX && defined(__aarch64__) // Android headers in the older NDK releases miss this definition. struct __sanitizer_esr_context { struct _aarch64_ctx head; uint64_t esr; }; static bool Aarch64GetESR(ucontext_t *ucontext, u64 *esr) { static const u32 kEsrMagic = 0x45535201; u8 *aux = reinterpret_cast(ucontext->uc_mcontext.__reserved); while (true) { _aarch64_ctx *ctx = (_aarch64_ctx *)aux; if (ctx->size == 0) break; if (ctx->magic == kEsrMagic) { *esr = ((__sanitizer_esr_context *)ctx)->esr; return true; } aux += ctx->size; } return false; } # elif SANITIZER_FREEBSD && defined(__aarch64__) // FreeBSD doesn't provide ESR in the ucontext. static bool Aarch64GetESR(ucontext_t *ucontext, u64 *esr) { return false; } # endif using Context = ucontext_t; SignalContext::WriteFlag SignalContext::GetWriteFlag() const { Context *ucontext = (Context *)context; # if defined(__x86_64__) || defined(__i386__) static const uptr PF_WRITE = 1U << 1; # if SANITIZER_FREEBSD uptr err = ucontext->uc_mcontext.mc_err; # elif SANITIZER_NETBSD uptr err = ucontext->uc_mcontext.__gregs[_REG_ERR]; # elif SANITIZER_SOLARIS && defined(__i386__) const int Err = 13; uptr err = ucontext->uc_mcontext.gregs[Err]; # else uptr err = ucontext->uc_mcontext.gregs[REG_ERR]; # endif // SANITIZER_FREEBSD return err & PF_WRITE ? Write : Read; # elif defined(__mips__) uint32_t *exception_source; uint32_t faulty_instruction; uint32_t op_code; exception_source = (uint32_t *)ucontext->uc_mcontext.pc; faulty_instruction = (uint32_t)(*exception_source); op_code = (faulty_instruction >> 26) & 0x3f; // FIXME: Add support for FPU, microMIPS, DSP, MSA memory instructions. switch (op_code) { case 0x28: // sb case 0x29: // sh case 0x2b: // sw case 0x3f: // sd # if __mips_isa_rev < 6 case 0x2c: // sdl case 0x2d: // sdr case 0x2a: // swl case 0x2e: // swr # endif return SignalContext::Write; case 0x20: // lb case 0x24: // lbu case 0x21: // lh case 0x25: // lhu case 0x23: // lw case 0x27: // lwu case 0x37: // ld # if __mips_isa_rev < 6 case 0x1a: // ldl case 0x1b: // ldr case 0x22: // lwl case 0x26: // lwr # endif return SignalContext::Read; # if __mips_isa_rev == 6 case 0x3b: // pcrel op_code = (faulty_instruction >> 19) & 0x3; switch (op_code) { case 0x1: // lwpc case 0x2: // lwupc return SignalContext::Read; } # endif } return SignalContext::Unknown; # elif defined(__arm__) static const uptr FSR_WRITE = 1U << 11; uptr fsr = ucontext->uc_mcontext.error_code; return fsr & FSR_WRITE ? Write : Read; # elif defined(__aarch64__) static const u64 ESR_ELx_WNR = 1U << 6; u64 esr; if (!Aarch64GetESR(ucontext, &esr)) return Unknown; return esr & ESR_ELx_WNR ? Write : Read; # elif defined(__loongarch__) // In the musl environment, the Linux kernel uapi sigcontext.h is not // included in signal.h. To avoid missing the SC_ADDRERR_{RD,WR} macros, // copy them here. The LoongArch Linux kernel uapi is already stable, // so there's no need to worry about the value changing. # ifndef SC_ADDRERR_RD // Address error was due to memory load # define SC_ADDRERR_RD (1 << 30) # endif # ifndef SC_ADDRERR_WR // Address error was due to memory store # define SC_ADDRERR_WR (1 << 31) # endif u32 flags = ucontext->uc_mcontext.__flags; if (flags & SC_ADDRERR_RD) return SignalContext::Read; if (flags & SC_ADDRERR_WR) return SignalContext::Write; return SignalContext::Unknown; # elif defined(__sparc__) // Decode the instruction to determine the access type. // From OpenSolaris $SRC/uts/sun4/os/trap.c (get_accesstype). # if SANITIZER_SOLARIS uptr pc = ucontext->uc_mcontext.gregs[REG_PC]; # else // Historical BSDism here. struct sigcontext *scontext = (struct sigcontext *)context; # if defined(__arch64__) uptr pc = scontext->sigc_regs.tpc; # else uptr pc = scontext->si_regs.pc; # endif # endif u32 instr = *(u32 *)pc; return (instr >> 21) & 1 ? Write : Read; # elif defined(__riscv) # if SANITIZER_FREEBSD unsigned long pc = ucontext->uc_mcontext.mc_gpregs.gp_sepc; # else unsigned long pc = ucontext->uc_mcontext.__gregs[REG_PC]; # endif unsigned faulty_instruction = *(uint16_t *)pc; # if defined(__riscv_compressed) if ((faulty_instruction & 0x3) != 0x3) { // it's a compressed instruction // set op_bits to the instruction bits [1, 0, 15, 14, 13] unsigned op_bits = ((faulty_instruction & 0x3) << 3) | (faulty_instruction >> 13); unsigned rd = faulty_instruction & 0xF80; // bits 7-11, inclusive switch (op_bits) { case 0b10'010: // c.lwsp (rd != x0) # if __riscv_xlen == 64 case 0b10'011: // c.ldsp (rd != x0) # endif return rd ? SignalContext::Read : SignalContext::Unknown; case 0b00'010: // c.lw # if __riscv_flen >= 32 && __riscv_xlen == 32 case 0b10'011: // c.flwsp # endif # if __riscv_flen >= 32 || __riscv_xlen == 64 case 0b00'011: // c.flw / c.ld # endif # if __riscv_flen == 64 case 0b00'001: // c.fld case 0b10'001: // c.fldsp # endif return SignalContext::Read; case 0b00'110: // c.sw case 0b10'110: // c.swsp # if __riscv_flen >= 32 || __riscv_xlen == 64 case 0b00'111: // c.fsw / c.sd case 0b10'111: // c.fswsp / c.sdsp # endif # if __riscv_flen == 64 case 0b00'101: // c.fsd case 0b10'101: // c.fsdsp # endif return SignalContext::Write; default: return SignalContext::Unknown; } } # endif unsigned opcode = faulty_instruction & 0x7f; // lower 7 bits unsigned funct3 = (faulty_instruction >> 12) & 0x7; // bits 12-14, inclusive switch (opcode) { case 0b0000011: // loads switch (funct3) { case 0b000: // lb case 0b001: // lh case 0b010: // lw # if __riscv_xlen == 64 case 0b011: // ld # endif case 0b100: // lbu case 0b101: // lhu return SignalContext::Read; default: return SignalContext::Unknown; } case 0b0100011: // stores switch (funct3) { case 0b000: // sb case 0b001: // sh case 0b010: // sw # if __riscv_xlen == 64 case 0b011: // sd # endif return SignalContext::Write; default: return SignalContext::Unknown; } # if __riscv_flen >= 32 case 0b0000111: // floating-point loads switch (funct3) { case 0b010: // flw # if __riscv_flen == 64 case 0b011: // fld # endif return SignalContext::Read; default: return SignalContext::Unknown; } case 0b0100111: // floating-point stores switch (funct3) { case 0b010: // fsw # if __riscv_flen == 64 case 0b011: // fsd # endif return SignalContext::Write; default: return SignalContext::Unknown; } # endif default: return SignalContext::Unknown; } # else (void)ucontext; return Unknown; // FIXME: Implement. # endif } bool SignalContext::IsTrueFaultingAddress() const { auto si = static_cast(siginfo); // SIGSEGV signals without a true fault address have si_code set to 128. return si->si_signo == SIGSEGV && si->si_code != 128; } UNUSED static const char *RegNumToRegName(int reg) { switch (reg) { # if SANITIZER_LINUX # if defined(__x86_64__) case REG_RAX: return "rax"; case REG_RBX: return "rbx"; case REG_RCX: return "rcx"; case REG_RDX: return "rdx"; case REG_RDI: return "rdi"; case REG_RSI: return "rsi"; case REG_RBP: return "rbp"; case REG_RSP: return "rsp"; case REG_R8: return "r8"; case REG_R9: return "r9"; case REG_R10: return "r10"; case REG_R11: return "r11"; case REG_R12: return "r12"; case REG_R13: return "r13"; case REG_R14: return "r14"; case REG_R15: return "r15"; # elif defined(__i386__) case REG_EAX: return "eax"; case REG_EBX: return "ebx"; case REG_ECX: return "ecx"; case REG_EDX: return "edx"; case REG_EDI: return "edi"; case REG_ESI: return "esi"; case REG_EBP: return "ebp"; case REG_ESP: return "esp"; # endif # endif default: return NULL; } return NULL; } # if SANITIZER_LINUX UNUSED static void DumpSingleReg(ucontext_t *ctx, int RegNum) { const char *RegName = RegNumToRegName(RegNum); # if defined(__x86_64__) Printf("%s%s = 0x%016llx ", internal_strlen(RegName) == 2 ? " " : "", RegName, ctx->uc_mcontext.gregs[RegNum]); # elif defined(__i386__) Printf("%s = 0x%08x ", RegName, ctx->uc_mcontext.gregs[RegNum]); # else (void)RegName; # endif } # endif void SignalContext::DumpAllRegisters(void *context) { ucontext_t *ucontext = (ucontext_t *)context; # if SANITIZER_LINUX # if defined(__x86_64__) Report("Register values:\n"); DumpSingleReg(ucontext, REG_RAX); DumpSingleReg(ucontext, REG_RBX); DumpSingleReg(ucontext, REG_RCX); DumpSingleReg(ucontext, REG_RDX); Printf("\n"); DumpSingleReg(ucontext, REG_RDI); DumpSingleReg(ucontext, REG_RSI); DumpSingleReg(ucontext, REG_RBP); DumpSingleReg(ucontext, REG_RSP); Printf("\n"); DumpSingleReg(ucontext, REG_R8); DumpSingleReg(ucontext, REG_R9); DumpSingleReg(ucontext, REG_R10); DumpSingleReg(ucontext, REG_R11); Printf("\n"); DumpSingleReg(ucontext, REG_R12); DumpSingleReg(ucontext, REG_R13); DumpSingleReg(ucontext, REG_R14); DumpSingleReg(ucontext, REG_R15); Printf("\n"); # elif defined(__i386__) // Duplication of this report print is caused by partial support // of register values dumping. In case of unsupported yet architecture let's // avoid printing 'Register values:' without actual values in the following // output. Report("Register values:\n"); DumpSingleReg(ucontext, REG_EAX); DumpSingleReg(ucontext, REG_EBX); DumpSingleReg(ucontext, REG_ECX); DumpSingleReg(ucontext, REG_EDX); Printf("\n"); DumpSingleReg(ucontext, REG_EDI); DumpSingleReg(ucontext, REG_ESI); DumpSingleReg(ucontext, REG_EBP); DumpSingleReg(ucontext, REG_ESP); Printf("\n"); # else (void)ucontext; # endif # elif SANITIZER_FREEBSD # if defined(__x86_64__) Report("Register values:\n"); Printf("rax = 0x%016lx ", ucontext->uc_mcontext.mc_rax); Printf("rbx = 0x%016lx ", ucontext->uc_mcontext.mc_rbx); Printf("rcx = 0x%016lx ", ucontext->uc_mcontext.mc_rcx); Printf("rdx = 0x%016lx ", ucontext->uc_mcontext.mc_rdx); Printf("\n"); Printf("rdi = 0x%016lx ", ucontext->uc_mcontext.mc_rdi); Printf("rsi = 0x%016lx ", ucontext->uc_mcontext.mc_rsi); Printf("rbp = 0x%016lx ", ucontext->uc_mcontext.mc_rbp); Printf("rsp = 0x%016lx ", ucontext->uc_mcontext.mc_rsp); Printf("\n"); Printf(" r8 = 0x%016lx ", ucontext->uc_mcontext.mc_r8); Printf(" r9 = 0x%016lx ", ucontext->uc_mcontext.mc_r9); Printf("r10 = 0x%016lx ", ucontext->uc_mcontext.mc_r10); Printf("r11 = 0x%016lx ", ucontext->uc_mcontext.mc_r11); Printf("\n"); Printf("r12 = 0x%016lx ", ucontext->uc_mcontext.mc_r12); Printf("r13 = 0x%016lx ", ucontext->uc_mcontext.mc_r13); Printf("r14 = 0x%016lx ", ucontext->uc_mcontext.mc_r14); Printf("r15 = 0x%016lx ", ucontext->uc_mcontext.mc_r15); Printf("\n"); # elif defined(__i386__) Report("Register values:\n"); Printf("eax = 0x%08x ", ucontext->uc_mcontext.mc_eax); Printf("ebx = 0x%08x ", ucontext->uc_mcontext.mc_ebx); Printf("ecx = 0x%08x ", ucontext->uc_mcontext.mc_ecx); Printf("edx = 0x%08x ", ucontext->uc_mcontext.mc_edx); Printf("\n"); Printf("edi = 0x%08x ", ucontext->uc_mcontext.mc_edi); Printf("esi = 0x%08x ", ucontext->uc_mcontext.mc_esi); Printf("ebp = 0x%08x ", ucontext->uc_mcontext.mc_ebp); Printf("esp = 0x%08x ", ucontext->uc_mcontext.mc_esp); Printf("\n"); # else (void)ucontext; # endif # endif // FIXME: Implement this for other OSes and architectures. } static void GetPcSpBp(void *context, uptr *pc, uptr *sp, uptr *bp) { # if SANITIZER_NETBSD // This covers all NetBSD architectures ucontext_t *ucontext = (ucontext_t *)context; *pc = _UC_MACHINE_PC(ucontext); *bp = _UC_MACHINE_FP(ucontext); *sp = _UC_MACHINE_SP(ucontext); # elif defined(__arm__) ucontext_t *ucontext = (ucontext_t *)context; *pc = ucontext->uc_mcontext.arm_pc; *bp = ucontext->uc_mcontext.arm_fp; *sp = ucontext->uc_mcontext.arm_sp; # elif defined(__aarch64__) # if SANITIZER_FREEBSD ucontext_t *ucontext = (ucontext_t *)context; *pc = ucontext->uc_mcontext.mc_gpregs.gp_elr; *bp = ucontext->uc_mcontext.mc_gpregs.gp_x[29]; *sp = ucontext->uc_mcontext.mc_gpregs.gp_sp; # else ucontext_t *ucontext = (ucontext_t *)context; *pc = ucontext->uc_mcontext.pc; *bp = ucontext->uc_mcontext.regs[29]; *sp = ucontext->uc_mcontext.sp; # endif # elif defined(__hppa__) ucontext_t *ucontext = (ucontext_t *)context; *pc = ucontext->uc_mcontext.sc_iaoq[0]; /* GCC uses %r3 whenever a frame pointer is needed. */ *bp = ucontext->uc_mcontext.sc_gr[3]; *sp = ucontext->uc_mcontext.sc_gr[30]; # elif defined(__x86_64__) # if SANITIZER_FREEBSD ucontext_t *ucontext = (ucontext_t *)context; *pc = ucontext->uc_mcontext.mc_rip; *bp = ucontext->uc_mcontext.mc_rbp; *sp = ucontext->uc_mcontext.mc_rsp; # else ucontext_t *ucontext = (ucontext_t *)context; *pc = ucontext->uc_mcontext.gregs[REG_RIP]; *bp = ucontext->uc_mcontext.gregs[REG_RBP]; *sp = ucontext->uc_mcontext.gregs[REG_RSP]; # endif # elif defined(__i386__) # if SANITIZER_FREEBSD ucontext_t *ucontext = (ucontext_t *)context; *pc = ucontext->uc_mcontext.mc_eip; *bp = ucontext->uc_mcontext.mc_ebp; *sp = ucontext->uc_mcontext.mc_esp; # else ucontext_t *ucontext = (ucontext_t *)context; # if SANITIZER_SOLARIS /* Use the numeric values: the symbolic ones are undefined by llvm include/llvm/Support/Solaris.h. */ # ifndef REG_EIP # define REG_EIP 14 // REG_PC # endif # ifndef REG_EBP # define REG_EBP 6 // REG_FP # endif # ifndef REG_UESP # define REG_UESP 17 // REG_SP # endif # endif *pc = ucontext->uc_mcontext.gregs[REG_EIP]; *bp = ucontext->uc_mcontext.gregs[REG_EBP]; *sp = ucontext->uc_mcontext.gregs[REG_UESP]; # endif # elif defined(__powerpc__) || defined(__powerpc64__) # if SANITIZER_FREEBSD ucontext_t *ucontext = (ucontext_t *)context; *pc = ucontext->uc_mcontext.mc_srr0; *sp = ucontext->uc_mcontext.mc_frame[1]; *bp = ucontext->uc_mcontext.mc_frame[31]; # else ucontext_t *ucontext = (ucontext_t *)context; *pc = ucontext->uc_mcontext.regs->nip; *sp = ucontext->uc_mcontext.regs->gpr[PT_R1]; // The powerpc{,64}-linux ABIs do not specify r31 as the frame // pointer, but GCC always uses r31 when we need a frame pointer. *bp = ucontext->uc_mcontext.regs->gpr[PT_R31]; # endif # elif defined(__sparc__) # if defined(__arch64__) || defined(__sparcv9) # define STACK_BIAS 2047 # else # define STACK_BIAS 0 # endif # if SANITIZER_SOLARIS ucontext_t *ucontext = (ucontext_t *)context; *pc = ucontext->uc_mcontext.gregs[REG_PC]; *sp = ucontext->uc_mcontext.gregs[REG_O6] + STACK_BIAS; # else // Historical BSDism here. struct sigcontext *scontext = (struct sigcontext *)context; # if defined(__arch64__) *pc = scontext->sigc_regs.tpc; *sp = scontext->sigc_regs.u_regs[14] + STACK_BIAS; # else *pc = scontext->si_regs.pc; *sp = scontext->si_regs.u_regs[14]; # endif # endif *bp = (uptr)((uhwptr *)*sp)[14] + STACK_BIAS; # elif defined(__mips__) ucontext_t *ucontext = (ucontext_t *)context; *pc = ucontext->uc_mcontext.pc; *bp = ucontext->uc_mcontext.gregs[30]; *sp = ucontext->uc_mcontext.gregs[29]; # elif defined(__s390__) ucontext_t *ucontext = (ucontext_t *)context; # if defined(__s390x__) *pc = ucontext->uc_mcontext.psw.addr; # else *pc = ucontext->uc_mcontext.psw.addr & 0x7fffffff; # endif *bp = ucontext->uc_mcontext.gregs[11]; *sp = ucontext->uc_mcontext.gregs[15]; # elif defined(__riscv) ucontext_t *ucontext = (ucontext_t *)context; # if SANITIZER_FREEBSD *pc = ucontext->uc_mcontext.mc_gpregs.gp_sepc; *bp = ucontext->uc_mcontext.mc_gpregs.gp_s[0]; *sp = ucontext->uc_mcontext.mc_gpregs.gp_sp; # else *pc = ucontext->uc_mcontext.__gregs[REG_PC]; *bp = ucontext->uc_mcontext.__gregs[REG_S0]; *sp = ucontext->uc_mcontext.__gregs[REG_SP]; # endif # elif defined(__hexagon__) ucontext_t *ucontext = (ucontext_t *)context; *pc = ucontext->uc_mcontext.pc; *bp = ucontext->uc_mcontext.r30; *sp = ucontext->uc_mcontext.r29; # elif defined(__loongarch__) ucontext_t *ucontext = (ucontext_t *)context; *pc = ucontext->uc_mcontext.__pc; *bp = ucontext->uc_mcontext.__gregs[22]; *sp = ucontext->uc_mcontext.__gregs[3]; # else # error "Unsupported arch" # endif } void SignalContext::InitPcSpBp() { GetPcSpBp(context, &pc, &sp, &bp); } void InitializePlatformEarly() { // Do nothing. } void CheckASLR() { # if SANITIZER_NETBSD int mib[3]; int paxflags; uptr len = sizeof(paxflags); mib[0] = CTL_PROC; mib[1] = internal_getpid(); mib[2] = PROC_PID_PAXFLAGS; if (UNLIKELY(internal_sysctl(mib, 3, &paxflags, &len, NULL, 0) == -1)) { Printf("sysctl failed\n"); Die(); } if (UNLIKELY(paxflags & CTL_PROC_PAXFLAGS_ASLR)) { Printf( "This sanitizer is not compatible with enabled ASLR.\n" "To disable ASLR, please run \"paxctl +a %s\" and try again.\n", GetArgv()[0]); Die(); } # elif SANITIZER_FREEBSD int aslr_status; int r = internal_procctl(P_PID, 0, PROC_ASLR_STATUS, &aslr_status); if (UNLIKELY(r == -1)) { // We're making things less 'dramatic' here since // the cmd is not necessarily guaranteed to be here // just yet regarding FreeBSD release return; } if ((aslr_status & PROC_ASLR_ACTIVE) != 0) { VReport(1, "This sanitizer is not compatible with enabled ASLR " "and binaries compiled with PIE\n" "ASLR will be disabled and the program re-executed.\n"); int aslr_ctl = PROC_ASLR_FORCE_DISABLE; CHECK_NE(internal_procctl(P_PID, 0, PROC_ASLR_CTL, &aslr_ctl), -1); ReExec(); } # elif SANITIZER_PPC64V2 // Disable ASLR for Linux PPC64LE. int old_personality = personality(0xffffffff); if (old_personality != -1 && (old_personality & ADDR_NO_RANDOMIZE) == 0) { VReport(1, "WARNING: Program is being run with address space layout " "randomization (ASLR) enabled which prevents the thread and " "memory sanitizers from working on powerpc64le.\n" "ASLR will be disabled and the program re-executed.\n"); CHECK_NE(personality(old_personality | ADDR_NO_RANDOMIZE), -1); ReExec(); } # else // Do nothing # endif } void CheckMPROTECT() { # if SANITIZER_NETBSD int mib[3]; int paxflags; uptr len = sizeof(paxflags); mib[0] = CTL_PROC; mib[1] = internal_getpid(); mib[2] = PROC_PID_PAXFLAGS; if (UNLIKELY(internal_sysctl(mib, 3, &paxflags, &len, NULL, 0) == -1)) { Printf("sysctl failed\n"); Die(); } if (UNLIKELY(paxflags & CTL_PROC_PAXFLAGS_MPROTECT)) { Printf("This sanitizer is not compatible with enabled MPROTECT\n"); Die(); } # else // Do nothing # endif } void CheckNoDeepBind(const char *filename, int flag) { # ifdef RTLD_DEEPBIND if (flag & RTLD_DEEPBIND) { Report( "You are trying to dlopen a %s shared library with RTLD_DEEPBIND flag" " which is incompatible with sanitizer runtime " "(see https://github.com/google/sanitizers/issues/611 for details" "). If you want to run %s library under sanitizers please remove " "RTLD_DEEPBIND from dlopen flags.\n", filename, filename); Die(); } # endif } uptr FindAvailableMemoryRange(uptr size, uptr alignment, uptr left_padding, uptr *largest_gap_found, uptr *max_occupied_addr) { UNREACHABLE("FindAvailableMemoryRange is not available"); return 0; } bool GetRandom(void *buffer, uptr length, bool blocking) { if (!buffer || !length || length > 256) return false; # if SANITIZER_USE_GETENTROPY uptr rnd = getentropy(buffer, length); int rverrno = 0; if (internal_iserror(rnd, &rverrno) && rverrno == EFAULT) return false; else if (rnd == 0) return true; # endif // SANITIZER_USE_GETENTROPY # if SANITIZER_USE_GETRANDOM static atomic_uint8_t skip_getrandom_syscall; if (!atomic_load_relaxed(&skip_getrandom_syscall)) { // Up to 256 bytes, getrandom will not be interrupted. uptr res = internal_syscall(SYSCALL(getrandom), buffer, length, blocking ? 0 : GRND_NONBLOCK); int rverrno = 0; if (internal_iserror(res, &rverrno) && rverrno == ENOSYS) atomic_store_relaxed(&skip_getrandom_syscall, 1); else if (res == length) return true; } # endif // SANITIZER_USE_GETRANDOM // Up to 256 bytes, a read off /dev/urandom will not be interrupted. // blocking is moot here, O_NONBLOCK has no effect when opening /dev/urandom. uptr fd = internal_open("/dev/urandom", O_RDONLY); if (internal_iserror(fd)) return false; uptr res = internal_read(fd, buffer, length); if (internal_iserror(res)) return false; internal_close(fd); return true; } } // namespace __sanitizer #endif diff --git a/libcxx/include/__config b/libcxx/include/__config index ecb21a705c51..a929db5d0f2d 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -1,1228 +1,1228 @@ // -*- C++ -*- //===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef _LIBCPP___CONFIG #define _LIBCPP___CONFIG #include <__config_site> #include <__configuration/abi.h> #include <__configuration/availability.h> #include <__configuration/compiler.h> #include <__configuration/platform.h> #ifndef _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER # pragma GCC system_header #endif #ifdef __cplusplus // The attributes supported by clang are documented at https://clang.llvm.org/docs/AttributeReference.html // _LIBCPP_VERSION represents the version of libc++, which matches the version of LLVM. // Given a LLVM release LLVM XX.YY.ZZ (e.g. LLVM 17.0.1 == 17.00.01), _LIBCPP_VERSION is // defined to XXYYZZ. -# define _LIBCPP_VERSION 190103 +# define _LIBCPP_VERSION 190104 # define _LIBCPP_CONCAT_IMPL(_X, _Y) _X##_Y # define _LIBCPP_CONCAT(_X, _Y) _LIBCPP_CONCAT_IMPL(_X, _Y) # if __STDC_HOSTED__ == 0 # define _LIBCPP_FREESTANDING # endif // HARDENING { // This is for backward compatibility -- make enabling `_LIBCPP_ENABLE_ASSERTIONS` (which predates hardening modes) // equivalent to setting the extensive mode. This is deprecated and will be removed in LLVM 20. # ifdef _LIBCPP_ENABLE_ASSERTIONS # warning "_LIBCPP_ENABLE_ASSERTIONS is deprecated, please use _LIBCPP_HARDENING_MODE instead" # if _LIBCPP_ENABLE_ASSERTIONS != 0 && _LIBCPP_ENABLE_ASSERTIONS != 1 # error "_LIBCPP_ENABLE_ASSERTIONS must be set to 0 or 1" # endif # if _LIBCPP_ENABLE_ASSERTIONS # define _LIBCPP_HARDENING_MODE _LIBCPP_HARDENING_MODE_EXTENSIVE # endif # endif // The library provides the macro `_LIBCPP_HARDENING_MODE` which can be set to one of the following values: // // - `_LIBCPP_HARDENING_MODE_NONE`; // - `_LIBCPP_HARDENING_MODE_FAST`; // - `_LIBCPP_HARDENING_MODE_EXTENSIVE`; // - `_LIBCPP_HARDENING_MODE_DEBUG`. // // These values have the following effects: // // - `_LIBCPP_HARDENING_MODE_NONE` -- sets the hardening mode to "none" which disables all runtime hardening checks; // // - `_LIBCPP_HARDENING_MODE_FAST` -- sets that hardening mode to "fast". The fast mode enables security-critical checks // that can be done with relatively little runtime overhead in constant time; // // - `_LIBCPP_HARDENING_MODE_EXTENSIVE` -- sets the hardening mode to "extensive". The extensive mode is a superset of // the fast mode that additionally enables checks that are relatively cheap and prevent common types of logic errors // but are not necessarily security-critical; // // - `_LIBCPP_HARDENING_MODE_DEBUG` -- sets the hardening mode to "debug". The debug mode is a superset of the extensive // mode and enables all checks available in the library, including internal assertions. Checks that are part of the // debug mode can be very expensive and thus the debug mode is intended to be used for testing, not in production. // Inside the library, assertions are categorized so they can be cherry-picked based on the chosen hardening mode. These // macros are only for internal use -- users should only pick one of the high-level hardening modes described above. // // - `_LIBCPP_ASSERT_VALID_INPUT_RANGE` -- checks that ranges (whether expressed as an iterator pair, an iterator and // a sentinel, an iterator and a count, or a `std::range`) given as input to library functions are valid: // - the sentinel is reachable from the begin iterator; // - TODO(hardening): both iterators refer to the same container. // // - `_LIBCPP_ASSERT_VALID_ELEMENT_ACCESS` -- checks that any attempts to access a container element, whether through // the container object or through an iterator, are valid and do not attempt to go out of bounds or otherwise access // a non-existent element. For iterator checks to work, bounded iterators must be enabled in the ABI. Types like // `optional` and `function` are considered one-element containers for the purposes of this check. // // - `_LIBCPP_ASSERT_NON_NULL` -- checks that the pointer being dereferenced is not null. On most modern platforms zero // address does not refer to an actual location in memory, so a null pointer dereference would not compromize the // memory security of a program (however, it is still undefined behavior that can result in strange errors due to // compiler optimizations). // // - `_LIBCPP_ASSERT_NON_OVERLAPPING_RANGES` -- for functions that take several ranges as arguments, checks that the // given ranges do not overlap. // // - `_LIBCPP_ASSERT_VALID_DEALLOCATION` -- checks that an attempt to deallocate memory is valid (e.g. the given object // was allocated by the given allocator). Violating this category typically results in a memory leak. // // - `_LIBCPP_ASSERT_VALID_EXTERNAL_API_CALL` -- checks that a call to an external API doesn't fail in // an unexpected manner. This includes triggering documented cases of undefined behavior in an external library (like // attempting to unlock an unlocked mutex in pthreads). Any API external to the library falls under this category // (from system calls to compiler intrinsics). We generally don't expect these failures to compromize memory safety or // otherwise create an immediate security issue. // // - `_LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR` -- checks any operations that exchange nodes between containers to make sure // the containers have compatible allocators. // // - `_LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN` -- checks that the given argument is within the domain of valid arguments // for the function. Violating this typically produces an incorrect result (e.g. the clamp algorithm returns the // original value without clamping it due to incorrect functors) or puts an object into an invalid state (e.g. // a string view where only a subset of elements is possible to access). This category is for assertions violating // which doesn't cause any immediate issues in the library -- whatever the consequences are, they will happen in the // user code. // // - `_LIBCPP_ASSERT_PEDANTIC` -- checks prerequisites which are imposed by the Standard, but violating which happens to // be benign in our implementation. // // - `_LIBCPP_ASSERT_SEMANTIC_REQUIREMENT` -- checks that the given argument satisfies the semantic requirements imposed // by the Standard. Typically, there is no simple way to completely prove that a semantic requirement is satisfied; // thus, this would often be a heuristic check and it might be quite expensive. // // - `_LIBCPP_ASSERT_INTERNAL` -- checks that internal invariants of the library hold. These assertions don't depend on // user input. // // - `_LIBCPP_ASSERT_UNCATEGORIZED` -- for assertions that haven't been properly classified yet. // clang-format off # define _LIBCPP_HARDENING_MODE_NONE (1 << 1) # define _LIBCPP_HARDENING_MODE_FAST (1 << 2) # define _LIBCPP_HARDENING_MODE_EXTENSIVE (1 << 4) // Deliberately not ordered. # define _LIBCPP_HARDENING_MODE_DEBUG (1 << 3) // clang-format on # ifndef _LIBCPP_HARDENING_MODE # ifndef _LIBCPP_HARDENING_MODE_DEFAULT # error _LIBCPP_HARDENING_MODE_DEFAULT is not defined. This definition should be set at configuration time in the \ `__config_site` header, please make sure your installation of libc++ is not broken. # endif # define _LIBCPP_HARDENING_MODE _LIBCPP_HARDENING_MODE_DEFAULT # endif # if _LIBCPP_HARDENING_MODE != _LIBCPP_HARDENING_MODE_NONE && \ _LIBCPP_HARDENING_MODE != _LIBCPP_HARDENING_MODE_FAST && \ _LIBCPP_HARDENING_MODE != _LIBCPP_HARDENING_MODE_EXTENSIVE && \ _LIBCPP_HARDENING_MODE != _LIBCPP_HARDENING_MODE_DEBUG # error _LIBCPP_HARDENING_MODE must be set to one of the following values: \ _LIBCPP_HARDENING_MODE_NONE, \ _LIBCPP_HARDENING_MODE_FAST, \ _LIBCPP_HARDENING_MODE_EXTENSIVE, \ _LIBCPP_HARDENING_MODE_DEBUG # endif // } HARDENING # define _LIBCPP_TOSTRING2(x) #x # define _LIBCPP_TOSTRING(x) _LIBCPP_TOSTRING2(x) // NOLINTNEXTLINE(libcpp-cpp-version-check) # if __cplusplus < 201103L # define _LIBCPP_CXX03_LANG # endif # ifndef __has_constexpr_builtin # define __has_constexpr_builtin(x) 0 # endif // This checks wheter a Clang module is built # ifndef __building_module # define __building_module(...) 0 # endif // '__is_identifier' returns '0' if '__x' is a reserved identifier provided by // the compiler and '1' otherwise. # ifndef __is_identifier # define __is_identifier(__x) 1 # endif # ifndef __has_declspec_attribute # define __has_declspec_attribute(__x) 0 # endif # define __has_keyword(__x) !(__is_identifier(__x)) # ifndef __has_warning # define __has_warning(...) 0 # endif # if !defined(_LIBCPP_COMPILER_CLANG_BASED) && __cplusplus < 201103L # error "libc++ only supports C++03 with Clang-based compilers. Please enable C++11" # endif // FIXME: ABI detection should be done via compiler builtin macros. This // is just a placeholder until Clang implements such macros. For now assume // that Windows compilers pretending to be MSVC++ target the Microsoft ABI, // and allow the user to explicitly specify the ABI to handle cases where this // heuristic falls short. # if defined(_LIBCPP_ABI_FORCE_ITANIUM) && defined(_LIBCPP_ABI_FORCE_MICROSOFT) # error "Only one of _LIBCPP_ABI_FORCE_ITANIUM and _LIBCPP_ABI_FORCE_MICROSOFT can be defined" # elif defined(_LIBCPP_ABI_FORCE_ITANIUM) # define _LIBCPP_ABI_ITANIUM # elif defined(_LIBCPP_ABI_FORCE_MICROSOFT) # define _LIBCPP_ABI_MICROSOFT # else # if defined(_WIN32) && defined(_MSC_VER) # define _LIBCPP_ABI_MICROSOFT # else # define _LIBCPP_ABI_ITANIUM # endif # endif # if defined(_LIBCPP_ABI_MICROSOFT) && !defined(_LIBCPP_NO_VCRUNTIME) # define _LIBCPP_ABI_VCRUNTIME # endif # if __has_feature(experimental_library) # ifndef _LIBCPP_ENABLE_EXPERIMENTAL # define _LIBCPP_ENABLE_EXPERIMENTAL # endif # endif // Incomplete features get their own specific disabling flags. This makes it // easier to grep for target specific flags once the feature is complete. # if !defined(_LIBCPP_ENABLE_EXPERIMENTAL) && !defined(_LIBCPP_BUILDING_LIBRARY) # define _LIBCPP_HAS_NO_INCOMPLETE_PSTL # define _LIBCPP_HAS_NO_EXPERIMENTAL_STOP_TOKEN # define _LIBCPP_HAS_NO_EXPERIMENTAL_TZDB # define _LIBCPP_HAS_NO_EXPERIMENTAL_SYNCSTREAM # endif # if defined(__MVS__) # include // for __NATIVE_ASCII_F # endif # if defined(_WIN32) # define _LIBCPP_WIN32API # define _LIBCPP_SHORT_WCHAR 1 // Both MinGW and native MSVC provide a "MSVC"-like environment # define _LIBCPP_MSVCRT_LIKE // If mingw not explicitly detected, assume using MS C runtime only if // a MS compatibility version is specified. # if defined(_MSC_VER) && !defined(__MINGW32__) # define _LIBCPP_MSVCRT // Using Microsoft's C Runtime library # endif # if (defined(_M_AMD64) || defined(__x86_64__)) || (defined(_M_ARM) || defined(__arm__)) # define _LIBCPP_HAS_BITSCAN64 # endif # define _LIBCPP_HAS_OPEN_WITH_WCHAR # endif // defined(_WIN32) # if defined(_AIX) && !defined(__64BIT__) // The size of wchar is 2 byte on 32-bit mode on AIX. # define _LIBCPP_SHORT_WCHAR 1 # endif // Libc++ supports various implementations of std::random_device. // // _LIBCPP_USING_DEV_RANDOM // Read entropy from the given file, by default `/dev/urandom`. // If a token is provided, it is assumed to be the path to a file // to read entropy from. This is the default behavior if nothing // else is specified. This implementation requires storing state // inside `std::random_device`. // // _LIBCPP_USING_ARC4_RANDOM // Use arc4random(). This allows obtaining random data even when // using sandboxing mechanisms. On some platforms like Apple, this // is the recommended source of entropy for user-space programs. // When this option is used, the token passed to `std::random_device`'s // constructor *must* be "/dev/urandom" -- anything else is an error. // // _LIBCPP_USING_GETENTROPY // Use getentropy(). // When this option is used, the token passed to `std::random_device`'s // constructor *must* be "/dev/urandom" -- anything else is an error. // // _LIBCPP_USING_FUCHSIA_CPRNG // Use Fuchsia's zx_cprng_draw() system call, which is specified to // deliver high-quality entropy and cannot fail. // When this option is used, the token passed to `std::random_device`'s // constructor *must* be "/dev/urandom" -- anything else is an error. // // _LIBCPP_USING_NACL_RANDOM // NaCl's sandbox (which PNaCl also runs in) doesn't allow filesystem access, // including accesses to the special files under `/dev`. This implementation // uses the NaCL syscall `nacl_secure_random_init()` to get entropy. // When this option is used, the token passed to `std::random_device`'s // constructor *must* be "/dev/urandom" -- anything else is an error. // // _LIBCPP_USING_WIN32_RANDOM // Use rand_s(), for use on Windows. // When this option is used, the token passed to `std::random_device`'s // constructor *must* be "/dev/urandom" -- anything else is an error. # if defined(__APPLE__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ defined(__DragonFly__) # define _LIBCPP_USING_ARC4_RANDOM # elif defined(__wasi__) || defined(__EMSCRIPTEN__) # define _LIBCPP_USING_GETENTROPY # elif defined(__Fuchsia__) # define _LIBCPP_USING_FUCHSIA_CPRNG # elif defined(__native_client__) # define _LIBCPP_USING_NACL_RANDOM # elif defined(_LIBCPP_WIN32API) # define _LIBCPP_USING_WIN32_RANDOM # else # define _LIBCPP_USING_DEV_RANDOM # endif # ifndef _LIBCPP_CXX03_LANG # define _LIBCPP_ALIGNOF(_Tp) alignof(_Tp) # define _ALIGNAS_TYPE(x) alignas(x) # define _ALIGNAS(x) alignas(x) # define _LIBCPP_NORETURN [[noreturn]] # define _NOEXCEPT noexcept # define _NOEXCEPT_(...) noexcept(__VA_ARGS__) # define _LIBCPP_CONSTEXPR constexpr # else # define _LIBCPP_ALIGNOF(_Tp) _Alignof(_Tp) # define _ALIGNAS_TYPE(x) __attribute__((__aligned__(_LIBCPP_ALIGNOF(x)))) # define _ALIGNAS(x) __attribute__((__aligned__(x))) # define _LIBCPP_NORETURN __attribute__((__noreturn__)) # define _LIBCPP_HAS_NO_NOEXCEPT # define nullptr __nullptr # define _NOEXCEPT throw() # define _NOEXCEPT_(...) # define static_assert(...) _Static_assert(__VA_ARGS__) # define decltype(...) __decltype(__VA_ARGS__) # define _LIBCPP_CONSTEXPR typedef __char16_t char16_t; typedef __char32_t char32_t; # endif # define _LIBCPP_PREFERRED_ALIGNOF(_Tp) __alignof(_Tp) // Objective-C++ features (opt-in) # if __has_feature(objc_arc) # define _LIBCPP_HAS_OBJC_ARC # endif # if __has_feature(objc_arc_weak) # define _LIBCPP_HAS_OBJC_ARC_WEAK # endif # if __has_extension(blocks) # define _LIBCPP_HAS_EXTENSION_BLOCKS # endif # if defined(_LIBCPP_HAS_EXTENSION_BLOCKS) && defined(__APPLE__) # define _LIBCPP_HAS_BLOCKS_RUNTIME # endif # if !__has_feature(address_sanitizer) # define _LIBCPP_HAS_NO_ASAN # endif # define _LIBCPP_ALWAYS_INLINE __attribute__((__always_inline__)) # define _LIBCPP_DISABLE_EXTENSION_WARNING __extension__ # if defined(_LIBCPP_OBJECT_FORMAT_COFF) # ifdef _DLL # define _LIBCPP_CRT_FUNC __declspec(dllimport) # else # define _LIBCPP_CRT_FUNC # endif # if defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS) || (defined(__MINGW32__) && !defined(_LIBCPP_BUILDING_LIBRARY)) # define _LIBCPP_DLL_VIS # define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS # define _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS # define _LIBCPP_OVERRIDABLE_FUNC_VIS # define _LIBCPP_EXPORTED_FROM_ABI # elif defined(_LIBCPP_BUILDING_LIBRARY) # define _LIBCPP_DLL_VIS __declspec(dllexport) # if defined(__MINGW32__) # define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS _LIBCPP_DLL_VIS # define _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS # else # define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS # define _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS _LIBCPP_DLL_VIS # endif # define _LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_DLL_VIS # define _LIBCPP_EXPORTED_FROM_ABI __declspec(dllexport) # else # define _LIBCPP_DLL_VIS __declspec(dllimport) # define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS _LIBCPP_DLL_VIS # define _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS # define _LIBCPP_OVERRIDABLE_FUNC_VIS # define _LIBCPP_EXPORTED_FROM_ABI __declspec(dllimport) # endif # define _LIBCPP_HIDDEN # define _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS # define _LIBCPP_TEMPLATE_VIS # define _LIBCPP_TEMPLATE_DATA_VIS # define _LIBCPP_TYPE_VISIBILITY_DEFAULT # else # if !defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS) # define _LIBCPP_VISIBILITY(vis) __attribute__((__visibility__(vis))) # else # define _LIBCPP_VISIBILITY(vis) # endif # define _LIBCPP_HIDDEN _LIBCPP_VISIBILITY("hidden") # define _LIBCPP_TEMPLATE_DATA_VIS _LIBCPP_VISIBILITY("default") # define _LIBCPP_EXPORTED_FROM_ABI _LIBCPP_VISIBILITY("default") # define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS _LIBCPP_VISIBILITY("default") # define _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS // TODO: Make this a proper customization point or remove the option to override it. # ifndef _LIBCPP_OVERRIDABLE_FUNC_VIS # define _LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_VISIBILITY("default") # endif # if !defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS) // The inline should be removed once PR32114 is resolved # define _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS inline _LIBCPP_HIDDEN # else # define _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS # endif // GCC doesn't support the type_visibility attribute, so we have to keep the visibility attribute on templates # if !defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS) && !__has_attribute(__type_visibility__) # define _LIBCPP_TEMPLATE_VIS __attribute__((__visibility__("default"))) # else # define _LIBCPP_TEMPLATE_VIS # endif # if !defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS) && __has_attribute(__type_visibility__) # define _LIBCPP_TYPE_VISIBILITY_DEFAULT __attribute__((__type_visibility__("default"))) # else # define _LIBCPP_TYPE_VISIBILITY_DEFAULT # endif # endif // defined(_LIBCPP_OBJECT_FORMAT_COFF) # if __has_attribute(exclude_from_explicit_instantiation) # define _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION __attribute__((__exclude_from_explicit_instantiation__)) # else // Try to approximate the effect of exclude_from_explicit_instantiation // (which is that entities are not assumed to be provided by explicit // template instantiations in the dylib) by always inlining those entities. # define _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION _LIBCPP_ALWAYS_INLINE # endif # ifdef _LIBCPP_COMPILER_CLANG_BASED # define _LIBCPP_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") # define _LIBCPP_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") # define _LIBCPP_CLANG_DIAGNOSTIC_IGNORED(str) _Pragma(_LIBCPP_TOSTRING(clang diagnostic ignored str)) # define _LIBCPP_GCC_DIAGNOSTIC_IGNORED(str) # elif defined(_LIBCPP_COMPILER_GCC) # define _LIBCPP_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") # define _LIBCPP_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") # define _LIBCPP_CLANG_DIAGNOSTIC_IGNORED(str) # define _LIBCPP_GCC_DIAGNOSTIC_IGNORED(str) _Pragma(_LIBCPP_TOSTRING(GCC diagnostic ignored str)) # else # define _LIBCPP_DIAGNOSTIC_PUSH # define _LIBCPP_DIAGNOSTIC_POP # define _LIBCPP_CLANG_DIAGNOSTIC_IGNORED(str) # define _LIBCPP_GCC_DIAGNOSTIC_IGNORED(str) # endif # if _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_FAST # define _LIBCPP_HARDENING_SIG f # elif _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_EXTENSIVE # define _LIBCPP_HARDENING_SIG s # elif _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_DEBUG # define _LIBCPP_HARDENING_SIG d # else # define _LIBCPP_HARDENING_SIG n // "none" # endif # ifdef _LIBCPP_HAS_NO_EXCEPTIONS # define _LIBCPP_EXCEPTIONS_SIG n # else # define _LIBCPP_EXCEPTIONS_SIG e # endif # define _LIBCPP_ODR_SIGNATURE \ _LIBCPP_CONCAT(_LIBCPP_CONCAT(_LIBCPP_HARDENING_SIG, _LIBCPP_EXCEPTIONS_SIG), _LIBCPP_VERSION) // This macro marks a symbol as being hidden from libc++'s ABI. This is achieved // on two levels: // 1. The symbol is given hidden visibility, which ensures that users won't start exporting // symbols from their dynamic library by means of using the libc++ headers. This ensures // that those symbols stay private to the dynamic library in which it is defined. // // 2. The symbol is given an ABI tag that encodes the ODR-relevant properties of the library. // This ensures that no ODR violation can arise from mixing two TUs compiled with different // versions or configurations of libc++ (such as exceptions vs no-exceptions). Indeed, if the // program contains two definitions of a function, the ODR requires them to be token-by-token // equivalent, and the linker is allowed to pick either definition and discard the other one. // // For example, if a program contains a copy of `vector::at()` compiled with exceptions enabled // *and* a copy of `vector::at()` compiled with exceptions disabled (by means of having two TUs // compiled with different settings), the two definitions are both visible by the linker and they // have the same name, but they have a meaningfully different implementation (one throws an exception // and the other aborts the program). This violates the ODR and makes the program ill-formed, and in // practice what will happen is that the linker will pick one of the definitions at random and will // discard the other one. This can quite clearly lead to incorrect program behavior. // // A similar reasoning holds for many other properties that are ODR-affecting. Essentially any // property that causes the code of a function to differ from the code in another configuration // can be considered ODR-affecting. In practice, we don't encode all such properties in the ABI // tag, but we encode the ones that we think are most important: library version, exceptions, and // hardening mode. // // Note that historically, solving this problem has been achieved in various ways, including // force-inlining all functions or giving internal linkage to all functions. Both these previous // solutions suffer from drawbacks that lead notably to code bloat. // // Note that we use _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION to ensure that we don't depend // on _LIBCPP_HIDE_FROM_ABI methods of classes explicitly instantiated in the dynamic library. // // Also note that the _LIBCPP_HIDE_FROM_ABI_VIRTUAL macro should be used on virtual functions // instead of _LIBCPP_HIDE_FROM_ABI. That macro does not use an ABI tag. Indeed, the mangled // name of a virtual function is part of its ABI, since some architectures like arm64e can sign // the virtual function pointer in the vtable based on the mangled name of the function. Since // we use an ABI tag that changes with each released version, the mangled name of the virtual // function would change, which is incorrect. Note that it doesn't make much sense to change // the implementation of a virtual function in an ABI-incompatible way in the first place, // since that would be an ABI break anyway. Hence, the lack of ABI tag should not be noticeable. // // The macro can be applied to record and enum types. When the tagged type is nested in // a record this "parent" record needs to have the macro too. Another use case for applying // this macro to records and unions is to apply an ABI tag to inline constexpr variables. // This can be useful for inline variables that are implementation details which are expected // to change in the future. // // TODO: We provide a escape hatch with _LIBCPP_NO_ABI_TAG for folks who want to avoid increasing // the length of symbols with an ABI tag. In practice, we should remove the escape hatch and // use compression mangling instead, see https://github.com/itanium-cxx-abi/cxx-abi/issues/70. # ifndef _LIBCPP_NO_ABI_TAG # define _LIBCPP_HIDE_FROM_ABI \ _LIBCPP_HIDDEN _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION \ __attribute__((__abi_tag__(_LIBCPP_TOSTRING(_LIBCPP_ODR_SIGNATURE)))) # else # define _LIBCPP_HIDE_FROM_ABI _LIBCPP_HIDDEN _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION # endif # define _LIBCPP_HIDE_FROM_ABI_VIRTUAL _LIBCPP_HIDDEN _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION # ifdef _LIBCPP_BUILDING_LIBRARY # if _LIBCPP_ABI_VERSION > 1 # define _LIBCPP_HIDE_FROM_ABI_AFTER_V1 _LIBCPP_HIDE_FROM_ABI # else # define _LIBCPP_HIDE_FROM_ABI_AFTER_V1 # endif # else # define _LIBCPP_HIDE_FROM_ABI_AFTER_V1 _LIBCPP_HIDE_FROM_ABI # endif // TODO: Remove this workaround once we drop support for Clang 16 # if __has_warning("-Wc++23-extensions") # define _LIBCPP_CLANG_DIAGNOSTIC_IGNORED_CXX23_EXTENSION _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wc++23-extensions") # else # define _LIBCPP_CLANG_DIAGNOSTIC_IGNORED_CXX23_EXTENSION _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wc++2b-extensions") # endif // Clang modules take a significant compile time hit when pushing and popping diagnostics. // Since all the headers are marked as system headers in the modulemap, we can simply disable this // pushing and popping when building with clang modules. # if !__has_feature(modules) # define _LIBCPP_PUSH_EXTENSION_DIAGNOSTICS \ _LIBCPP_DIAGNOSTIC_PUSH \ _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wc++11-extensions") \ _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wc++14-extensions") \ _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wc++17-extensions") \ _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wc++20-extensions") \ _LIBCPP_CLANG_DIAGNOSTIC_IGNORED_CXX23_EXTENSION \ _LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Wc++14-extensions") \ _LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Wc++17-extensions") \ _LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Wc++20-extensions") \ _LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Wc++23-extensions") # define _LIBCPP_POP_EXTENSION_DIAGNOSTICS _LIBCPP_DIAGNOSTIC_POP # else # define _LIBCPP_PUSH_EXTENSION_DIAGNOSTICS # define _LIBCPP_POP_EXTENSION_DIAGNOSTICS # endif // Inline namespaces are available in Clang/GCC/MSVC regardless of C++ dialect. // clang-format off # define _LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_PUSH_EXTENSION_DIAGNOSTICS \ namespace _LIBCPP_TYPE_VISIBILITY_DEFAULT std { \ inline namespace _LIBCPP_ABI_NAMESPACE { # define _LIBCPP_END_NAMESPACE_STD }} _LIBCPP_POP_EXTENSION_DIAGNOSTICS #ifdef _LIBCPP_ABI_NO_FILESYSTEM_INLINE_NAMESPACE # define _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM _LIBCPP_BEGIN_NAMESPACE_STD namespace filesystem { # define _LIBCPP_END_NAMESPACE_FILESYSTEM } _LIBCPP_END_NAMESPACE_STD #else # define _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM _LIBCPP_BEGIN_NAMESPACE_STD \ inline namespace __fs { namespace filesystem { # define _LIBCPP_END_NAMESPACE_FILESYSTEM }} _LIBCPP_END_NAMESPACE_STD #endif // clang-format on # if __has_attribute(__enable_if__) # define _LIBCPP_PREFERRED_OVERLOAD __attribute__((__enable_if__(true, ""))) # endif # if !defined(__SIZEOF_INT128__) || defined(_MSC_VER) # define _LIBCPP_HAS_NO_INT128 # endif # ifdef _LIBCPP_CXX03_LANG # define _LIBCPP_DECLARE_STRONG_ENUM(x) \ struct _LIBCPP_EXPORTED_FROM_ABI x { \ enum __lx // clang-format off # define _LIBCPP_DECLARE_STRONG_ENUM_EPILOG(x) \ __lx __v_; \ _LIBCPP_HIDE_FROM_ABI x(__lx __v) : __v_(__v) {} \ _LIBCPP_HIDE_FROM_ABI explicit x(int __v) : __v_(static_cast<__lx>(__v)) {} \ _LIBCPP_HIDE_FROM_ABI operator int() const { return __v_; } \ }; // clang-format on # else // _LIBCPP_CXX03_LANG # define _LIBCPP_DECLARE_STRONG_ENUM(x) enum class x # define _LIBCPP_DECLARE_STRONG_ENUM_EPILOG(x) # endif // _LIBCPP_CXX03_LANG # if defined(__APPLE__) || defined(__FreeBSD__) || defined(_LIBCPP_MSVCRT_LIKE) || defined(__NetBSD__) # define _LIBCPP_LOCALE__L_EXTENSIONS 1 # endif # ifdef __FreeBSD__ # define _DECLARE_C99_LDBL_MATH 1 # endif // If we are getting operator new from the MSVC CRT, then allocation overloads // for align_val_t were added in 19.12, aka VS 2017 version 15.3. # if defined(_LIBCPP_MSVCRT) && defined(_MSC_VER) && _MSC_VER < 1912 # define _LIBCPP_HAS_NO_LIBRARY_ALIGNED_ALLOCATION # elif defined(_LIBCPP_ABI_VCRUNTIME) && !defined(__cpp_aligned_new) // We're deferring to Microsoft's STL to provide aligned new et al. We don't // have it unless the language feature test macro is defined. # define _LIBCPP_HAS_NO_LIBRARY_ALIGNED_ALLOCATION # elif defined(__MVS__) # define _LIBCPP_HAS_NO_LIBRARY_ALIGNED_ALLOCATION # endif # if defined(_LIBCPP_HAS_NO_LIBRARY_ALIGNED_ALLOCATION) || (!defined(__cpp_aligned_new) || __cpp_aligned_new < 201606) # define _LIBCPP_HAS_NO_ALIGNED_ALLOCATION # endif // It is not yet possible to use aligned_alloc() on all Apple platforms since // 10.15 was the first version to ship an implementation of aligned_alloc(). # if defined(__APPLE__) # if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && \ __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101500) # define _LIBCPP_HAS_NO_C11_ALIGNED_ALLOC # endif # elif defined(__ANDROID__) && __ANDROID_API__ < 28 // Android only provides aligned_alloc when targeting API 28 or higher. # define _LIBCPP_HAS_NO_C11_ALIGNED_ALLOC # endif # if defined(__APPLE__) || defined(__FreeBSD__) # define _LIBCPP_HAS_DEFAULTRUNELOCALE # endif # if defined(__APPLE__) || defined(__FreeBSD__) # define _LIBCPP_WCTYPE_IS_MASK # endif # if _LIBCPP_STD_VER <= 17 || !defined(__cpp_char8_t) # define _LIBCPP_HAS_NO_CHAR8_T # endif // Deprecation macros. // // Deprecations warnings are always enabled, except when users explicitly opt-out // by defining _LIBCPP_DISABLE_DEPRECATION_WARNINGS. # if !defined(_LIBCPP_DISABLE_DEPRECATION_WARNINGS) # if __has_attribute(__deprecated__) # define _LIBCPP_DEPRECATED __attribute__((__deprecated__)) # define _LIBCPP_DEPRECATED_(m) __attribute__((__deprecated__(m))) # elif _LIBCPP_STD_VER >= 14 # define _LIBCPP_DEPRECATED [[deprecated]] # define _LIBCPP_DEPRECATED_(m) [[deprecated(m)]] # else # define _LIBCPP_DEPRECATED # define _LIBCPP_DEPRECATED_(m) # endif # else # define _LIBCPP_DEPRECATED # define _LIBCPP_DEPRECATED_(m) # endif # if _LIBCPP_STD_VER < 20 # define _LIBCPP_DEPRECATED_ATOMIC_SYNC \ _LIBCPP_DEPRECATED_("The C++20 synchronization library has been deprecated prior to C++20. Please update to " \ "using -std=c++20 if you need to use these facilities.") # else # define _LIBCPP_DEPRECATED_ATOMIC_SYNC /* nothing */ # endif # if !defined(_LIBCPP_CXX03_LANG) # define _LIBCPP_DEPRECATED_IN_CXX11 _LIBCPP_DEPRECATED # else # define _LIBCPP_DEPRECATED_IN_CXX11 # endif # if _LIBCPP_STD_VER >= 14 # define _LIBCPP_DEPRECATED_IN_CXX14 _LIBCPP_DEPRECATED # else # define _LIBCPP_DEPRECATED_IN_CXX14 # endif # if _LIBCPP_STD_VER >= 17 # define _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_DEPRECATED # else # define _LIBCPP_DEPRECATED_IN_CXX17 # endif # if _LIBCPP_STD_VER >= 20 # define _LIBCPP_DEPRECATED_IN_CXX20 _LIBCPP_DEPRECATED # else # define _LIBCPP_DEPRECATED_IN_CXX20 # endif # if _LIBCPP_STD_VER >= 23 # define _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_DEPRECATED # else # define _LIBCPP_DEPRECATED_IN_CXX23 # endif # if _LIBCPP_STD_VER >= 26 # define _LIBCPP_DEPRECATED_IN_CXX26 _LIBCPP_DEPRECATED # else # define _LIBCPP_DEPRECATED_IN_CXX26 # endif # if !defined(_LIBCPP_HAS_NO_CHAR8_T) # define _LIBCPP_DEPRECATED_WITH_CHAR8_T _LIBCPP_DEPRECATED # else # define _LIBCPP_DEPRECATED_WITH_CHAR8_T # endif // Macros to enter and leave a state where deprecation warnings are suppressed. # if defined(_LIBCPP_COMPILER_CLANG_BASED) || defined(_LIBCPP_COMPILER_GCC) # define _LIBCPP_SUPPRESS_DEPRECATED_PUSH \ _Pragma("GCC diagnostic push") _Pragma("GCC diagnostic ignored \"-Wdeprecated\"") \ _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") # define _LIBCPP_SUPPRESS_DEPRECATED_POP _Pragma("GCC diagnostic pop") # else # define _LIBCPP_SUPPRESS_DEPRECATED_PUSH # define _LIBCPP_SUPPRESS_DEPRECATED_POP # endif # if _LIBCPP_STD_VER <= 11 # define _LIBCPP_EXPLICIT_SINCE_CXX14 # else # define _LIBCPP_EXPLICIT_SINCE_CXX14 explicit # endif # if _LIBCPP_STD_VER >= 23 # define _LIBCPP_EXPLICIT_SINCE_CXX23 explicit # else # define _LIBCPP_EXPLICIT_SINCE_CXX23 # endif # if _LIBCPP_STD_VER >= 14 # define _LIBCPP_CONSTEXPR_SINCE_CXX14 constexpr # else # define _LIBCPP_CONSTEXPR_SINCE_CXX14 # endif # if _LIBCPP_STD_VER >= 17 # define _LIBCPP_CONSTEXPR_SINCE_CXX17 constexpr # else # define _LIBCPP_CONSTEXPR_SINCE_CXX17 # endif # if _LIBCPP_STD_VER >= 20 # define _LIBCPP_CONSTEXPR_SINCE_CXX20 constexpr # else # define _LIBCPP_CONSTEXPR_SINCE_CXX20 # endif # if _LIBCPP_STD_VER >= 23 # define _LIBCPP_CONSTEXPR_SINCE_CXX23 constexpr # else # define _LIBCPP_CONSTEXPR_SINCE_CXX23 # endif # ifndef _LIBCPP_WEAK # define _LIBCPP_WEAK __attribute__((__weak__)) # endif // Thread API // clang-format off # if !defined(_LIBCPP_HAS_NO_THREADS) && \ !defined(_LIBCPP_HAS_THREAD_API_PTHREAD) && \ !defined(_LIBCPP_HAS_THREAD_API_WIN32) && \ !defined(_LIBCPP_HAS_THREAD_API_EXTERNAL) # if defined(__FreeBSD__) || \ defined(__wasi__) || \ defined(__NetBSD__) || \ defined(__OpenBSD__) || \ defined(__NuttX__) || \ defined(__linux__) || \ defined(__GNU__) || \ defined(__APPLE__) || \ defined(__MVS__) || \ defined(_AIX) || \ defined(__EMSCRIPTEN__) // clang-format on # define _LIBCPP_HAS_THREAD_API_PTHREAD # elif defined(__Fuchsia__) // TODO(44575): Switch to C11 thread API when possible. # define _LIBCPP_HAS_THREAD_API_PTHREAD # elif defined(_LIBCPP_WIN32API) # define _LIBCPP_HAS_THREAD_API_WIN32 # else # error "No thread API" # endif // _LIBCPP_HAS_THREAD_API # endif // _LIBCPP_HAS_NO_THREADS # if defined(_LIBCPP_HAS_THREAD_API_PTHREAD) # if defined(__ANDROID__) && __ANDROID_API__ >= 30 # define _LIBCPP_HAS_COND_CLOCKWAIT # elif defined(_LIBCPP_GLIBC_PREREQ) # if _LIBCPP_GLIBC_PREREQ(2, 30) # define _LIBCPP_HAS_COND_CLOCKWAIT # endif # endif # endif # if defined(_LIBCPP_HAS_NO_THREADS) && defined(_LIBCPP_HAS_THREAD_API_PTHREAD) # error _LIBCPP_HAS_THREAD_API_PTHREAD may only be defined when \ _LIBCPP_HAS_NO_THREADS is not defined. # endif # if defined(_LIBCPP_HAS_NO_THREADS) && defined(_LIBCPP_HAS_THREAD_API_EXTERNAL) # error _LIBCPP_HAS_THREAD_API_EXTERNAL may not be defined when \ _LIBCPP_HAS_NO_THREADS is defined. # endif # if defined(_LIBCPP_HAS_NO_MONOTONIC_CLOCK) && !defined(_LIBCPP_HAS_NO_THREADS) # error _LIBCPP_HAS_NO_MONOTONIC_CLOCK may only be defined when \ _LIBCPP_HAS_NO_THREADS is defined. # endif # if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(__STDCPP_THREADS__) # define __STDCPP_THREADS__ 1 # endif // The glibc and Bionic implementation of pthreads implements // pthread_mutex_destroy as nop for regular mutexes. Additionally, Win32 // mutexes have no destroy mechanism. // // This optimization can't be performed on Apple platforms, where // pthread_mutex_destroy can allow the kernel to release resources. // See https://llvm.org/D64298 for details. // // TODO(EricWF): Enable this optimization on Bionic after speaking to their // respective stakeholders. // clang-format off # if (defined(_LIBCPP_HAS_THREAD_API_PTHREAD) && defined(__GLIBC__)) || \ (defined(_LIBCPP_HAS_THREAD_API_C11) && defined(__Fuchsia__)) || \ defined(_LIBCPP_HAS_THREAD_API_WIN32) // clang-format on # define _LIBCPP_HAS_TRIVIAL_MUTEX_DESTRUCTION # endif // Destroying a condvar is a nop on Windows. // // This optimization can't be performed on Apple platforms, where // pthread_cond_destroy can allow the kernel to release resources. // See https://llvm.org/D64298 for details. // // TODO(EricWF): This is potentially true for some pthread implementations // as well. # if (defined(_LIBCPP_HAS_THREAD_API_C11) && defined(__Fuchsia__)) || defined(_LIBCPP_HAS_THREAD_API_WIN32) # define _LIBCPP_HAS_TRIVIAL_CONDVAR_DESTRUCTION # endif # if defined(__BIONIC__) || defined(__NuttX__) || defined(__Fuchsia__) || defined(__wasi__) || \ defined(_LIBCPP_HAS_MUSL_LIBC) || defined(__OpenBSD__) # define _LIBCPP_PROVIDES_DEFAULT_RUNE_TABLE # endif # if __has_feature(cxx_atomic) || __has_extension(c_atomic) || __has_keyword(_Atomic) # define _LIBCPP_HAS_C_ATOMIC_IMP # elif defined(_LIBCPP_COMPILER_GCC) # define _LIBCPP_HAS_GCC_ATOMIC_IMP # endif # if !defined(_LIBCPP_HAS_C_ATOMIC_IMP) && !defined(_LIBCPP_HAS_GCC_ATOMIC_IMP) && \ !defined(_LIBCPP_HAS_EXTERNAL_ATOMIC_IMP) # define _LIBCPP_HAS_NO_ATOMIC_HEADER # else # ifndef _LIBCPP_ATOMIC_FLAG_TYPE # define _LIBCPP_ATOMIC_FLAG_TYPE bool # endif # endif # if defined(__FreeBSD__) && defined(__clang__) && __has_attribute(__no_thread_safety_analysis__) # define _LIBCPP_NO_THREAD_SAFETY_ANALYSIS __attribute__((__no_thread_safety_analysis__)) # else # define _LIBCPP_NO_THREAD_SAFETY_ANALYSIS # endif # if defined(_LIBCPP_ENABLE_THREAD_SAFETY_ANNOTATIONS) # if defined(__clang__) && __has_attribute(acquire_capability) // Work around the attribute handling in clang. When both __declspec and // __attribute__ are present, the processing goes awry preventing the definition // of the types. In MinGW mode, __declspec evaluates to __attribute__, and thus // combining the two does work. # if !defined(_MSC_VER) # define _LIBCPP_HAS_THREAD_SAFETY_ANNOTATIONS # endif # endif # endif # ifdef _LIBCPP_HAS_THREAD_SAFETY_ANNOTATIONS # define _LIBCPP_THREAD_SAFETY_ANNOTATION(x) __attribute__((x)) # else # define _LIBCPP_THREAD_SAFETY_ANNOTATION(x) # endif # if _LIBCPP_STD_VER >= 20 # define _LIBCPP_CONSTINIT constinit # elif __has_attribute(__require_constant_initialization__) # define _LIBCPP_CONSTINIT __attribute__((__require_constant_initialization__)) # else # define _LIBCPP_CONSTINIT # endif # if defined(__CUDACC__) || defined(__CUDA_ARCH__) || defined(__CUDA_LIBDEVICE__) // The CUDA SDK contains an unfortunate definition for the __noinline__ macro, // which breaks the regular __attribute__((__noinline__)) syntax. Therefore, // when compiling for CUDA we use the non-underscored version of the noinline // attribute. // // This is a temporary workaround and we still expect the CUDA SDK team to solve // this issue properly in the SDK headers. // // See https://github.com/llvm/llvm-project/pull/73838 for more details. # define _LIBCPP_NOINLINE __attribute__((noinline)) # elif __has_attribute(__noinline__) # define _LIBCPP_NOINLINE __attribute__((__noinline__)) # else # define _LIBCPP_NOINLINE # endif // We often repeat things just for handling wide characters in the library. // When wide characters are disabled, it can be useful to have a quick way of // disabling it without having to resort to #if-#endif, which has a larger // impact on readability. # if defined(_LIBCPP_HAS_NO_WIDE_CHARACTERS) # define _LIBCPP_IF_WIDE_CHARACTERS(...) # else # define _LIBCPP_IF_WIDE_CHARACTERS(...) __VA_ARGS__ # endif // clang-format off # define _LIBCPP_PUSH_MACROS _Pragma("push_macro(\"min\")") _Pragma("push_macro(\"max\")") _Pragma("push_macro(\"refresh\")") _Pragma("push_macro(\"move\")") _Pragma("push_macro(\"erase\")") # define _LIBCPP_POP_MACROS _Pragma("pop_macro(\"min\")") _Pragma("pop_macro(\"max\")") _Pragma("pop_macro(\"refresh\")") _Pragma("pop_macro(\"move\")") _Pragma("pop_macro(\"erase\")") // clang-format on # ifndef _LIBCPP_NO_AUTO_LINK # if defined(_LIBCPP_ABI_MICROSOFT) && !defined(_LIBCPP_BUILDING_LIBRARY) # if !defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS) # pragma comment(lib, "c++.lib") # else # pragma comment(lib, "libc++.lib") # endif # endif // defined(_LIBCPP_ABI_MICROSOFT) && !defined(_LIBCPP_BUILDING_LIBRARY) # endif // _LIBCPP_NO_AUTO_LINK // Configures the fopen close-on-exec mode character, if any. This string will // be appended to any mode string used by fstream for fopen/fdopen. // // Not all platforms support this, but it helps avoid fd-leaks on platforms that // do. # if defined(__BIONIC__) # define _LIBCPP_FOPEN_CLOEXEC_MODE "e" # else # define _LIBCPP_FOPEN_CLOEXEC_MODE # endif # if __has_cpp_attribute(msvc::no_unique_address) // MSVC implements [[no_unique_address]] as a silent no-op currently. // (If/when MSVC breaks its C++ ABI, it will be changed to work as intended.) // However, MSVC implements [[msvc::no_unique_address]] which does what // [[no_unique_address]] is supposed to do, in general. // Clang-cl does not yet (14.0) implement either [[no_unique_address]] or // [[msvc::no_unique_address]] though. If/when it does implement // [[msvc::no_unique_address]], this should be preferred though. # define _LIBCPP_NO_UNIQUE_ADDRESS [[msvc::no_unique_address]] # elif __has_cpp_attribute(no_unique_address) # define _LIBCPP_NO_UNIQUE_ADDRESS [[__no_unique_address__]] # else # define _LIBCPP_NO_UNIQUE_ADDRESS /* nothing */ // Note that this can be replaced by #error as soon as clang-cl // implements msvc::no_unique_address, since there should be no C++20 // compiler that doesn't support one of the two attributes at that point. // We generally don't want to use this macro outside of C++20-only code, // because using it conditionally in one language version only would make // the ABI inconsistent. # endif // c8rtomb() and mbrtoc8() were added in C++20 and C23. Support for these // functions is gradually being added to existing C libraries. The conditions // below check for known C library versions and conditions under which these // functions are declared by the C library. # define _LIBCPP_HAS_NO_C8RTOMB_MBRTOC8 // GNU libc 2.36 and newer declare c8rtomb() and mbrtoc8() in C++ modes if // __cpp_char8_t is defined or if C2X extensions are enabled. Determining // the latter depends on internal GNU libc details that are not appropriate // to depend on here, so any declarations present when __cpp_char8_t is not // defined are ignored. # if defined(_LIBCPP_GLIBC_PREREQ) # if _LIBCPP_GLIBC_PREREQ(2, 36) && defined(__cpp_char8_t) # undef _LIBCPP_HAS_NO_C8RTOMB_MBRTOC8 # endif # endif // There are a handful of public standard library types that are intended to // support CTAD but don't need any explicit deduction guides to do so. This // macro is used to mark them as such, which suppresses the // '-Wctad-maybe-unsupported' compiler warning when CTAD is used in user code // with these classes. # if _LIBCPP_STD_VER >= 17 # ifdef _LIBCPP_COMPILER_CLANG_BASED # define _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(_ClassName) \ template \ [[maybe_unused]] _ClassName(typename _Tag::__allow_ctad...)->_ClassName<_Tag...> # else # define _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(ClassName) \ template \ ClassName(typename _Tag::__allow_ctad...)->ClassName<_Tag...> # endif # else # define _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(_ClassName) static_assert(true, "") # endif // TODO(varconst): currently, there are bugs in Clang's intrinsics when handling Objective-C++ `id`, so don't use // compiler intrinsics in the Objective-C++ mode. # ifdef __OBJC__ # define _LIBCPP_WORKAROUND_OBJCXX_COMPILER_INTRINSICS # endif # define _PSTL_PRAGMA(x) _Pragma(#x) // Enable SIMD for compilers that support OpenMP 4.0 # if (defined(_OPENMP) && _OPENMP >= 201307) # define _PSTL_UDR_PRESENT # define _PSTL_PRAGMA_SIMD _PSTL_PRAGMA(omp simd) # define _PSTL_PRAGMA_DECLARE_SIMD _PSTL_PRAGMA(omp declare simd) # define _PSTL_PRAGMA_SIMD_REDUCTION(PRM) _PSTL_PRAGMA(omp simd reduction(PRM)) # define _PSTL_PRAGMA_SIMD_SCAN(PRM) _PSTL_PRAGMA(omp simd reduction(inscan, PRM)) # define _PSTL_PRAGMA_SIMD_INCLUSIVE_SCAN(PRM) _PSTL_PRAGMA(omp scan inclusive(PRM)) # define _PSTL_PRAGMA_SIMD_EXCLUSIVE_SCAN(PRM) _PSTL_PRAGMA(omp scan exclusive(PRM)) // Declaration of reduction functor, where // NAME - the name of the functor // OP - type of the callable object with the reduction operation // omp_in - refers to the local partial result // omp_out - refers to the final value of the combiner operator // omp_priv - refers to the private copy of the initial value // omp_orig - refers to the original variable to be reduced # define _PSTL_PRAGMA_DECLARE_REDUCTION(NAME, OP) \ _PSTL_PRAGMA(omp declare reduction(NAME:OP : omp_out(omp_in)) initializer(omp_priv = omp_orig)) # elif defined(_LIBCPP_COMPILER_CLANG_BASED) # define _PSTL_PRAGMA_SIMD _Pragma("clang loop vectorize(enable) interleave(enable)") # define _PSTL_PRAGMA_DECLARE_SIMD # define _PSTL_PRAGMA_SIMD_REDUCTION(PRM) _Pragma("clang loop vectorize(enable) interleave(enable)") # define _PSTL_PRAGMA_SIMD_SCAN(PRM) _Pragma("clang loop vectorize(enable) interleave(enable)") # define _PSTL_PRAGMA_SIMD_INCLUSIVE_SCAN(PRM) # define _PSTL_PRAGMA_SIMD_EXCLUSIVE_SCAN(PRM) # define _PSTL_PRAGMA_DECLARE_REDUCTION(NAME, OP) # else // (defined(_OPENMP) && _OPENMP >= 201307) # define _PSTL_PRAGMA_SIMD # define _PSTL_PRAGMA_DECLARE_SIMD # define _PSTL_PRAGMA_SIMD_REDUCTION(PRM) # define _PSTL_PRAGMA_SIMD_SCAN(PRM) # define _PSTL_PRAGMA_SIMD_INCLUSIVE_SCAN(PRM) # define _PSTL_PRAGMA_SIMD_EXCLUSIVE_SCAN(PRM) # define _PSTL_PRAGMA_DECLARE_REDUCTION(NAME, OP) # endif // (defined(_OPENMP) && _OPENMP >= 201307) # define _PSTL_USE_NONTEMPORAL_STORES_IF_ALLOWED // Optional attributes - these are useful for a better QoI, but not required to be available # if __has_attribute(__no_sanitize__) && !defined(_LIBCPP_COMPILER_GCC) # define _LIBCPP_NO_CFI __attribute__((__no_sanitize__("cfi"))) # else # define _LIBCPP_NO_CFI # endif # if __has_attribute(__malloc__) # define _LIBCPP_NOALIAS __attribute__((__malloc__)) # else # define _LIBCPP_NOALIAS # endif # if __has_attribute(__using_if_exists__) # define _LIBCPP_USING_IF_EXISTS __attribute__((__using_if_exists__)) # else # define _LIBCPP_USING_IF_EXISTS # endif # if __has_cpp_attribute(__nodiscard__) # define _LIBCPP_NODISCARD [[__nodiscard__]] # else // We can't use GCC's [[gnu::warn_unused_result]] and // __attribute__((warn_unused_result)), because GCC does not silence them via // (void) cast. # define _LIBCPP_NODISCARD # endif # if __has_attribute(__no_destroy__) # define _LIBCPP_NO_DESTROY __attribute__((__no_destroy__)) # else # define _LIBCPP_NO_DESTROY # endif # if __has_attribute(__diagnose_if__) # define _LIBCPP_DIAGNOSE_WARNING(...) __attribute__((__diagnose_if__(__VA_ARGS__, "warning"))) # else # define _LIBCPP_DIAGNOSE_WARNING(...) # endif // Use a function like macro to imply that it must be followed by a semicolon # if __has_cpp_attribute(fallthrough) # define _LIBCPP_FALLTHROUGH() [[fallthrough]] # elif __has_attribute(__fallthrough__) # define _LIBCPP_FALLTHROUGH() __attribute__((__fallthrough__)) # else # define _LIBCPP_FALLTHROUGH() ((void)0) # endif # if __has_cpp_attribute(_Clang::__lifetimebound__) # define _LIBCPP_LIFETIMEBOUND [[_Clang::__lifetimebound__]] # else # define _LIBCPP_LIFETIMEBOUND # endif # if __has_attribute(__nodebug__) # define _LIBCPP_NODEBUG __attribute__((__nodebug__)) # else # define _LIBCPP_NODEBUG # endif # if __has_attribute(__standalone_debug__) # define _LIBCPP_STANDALONE_DEBUG __attribute__((__standalone_debug__)) # else # define _LIBCPP_STANDALONE_DEBUG # endif # if __has_attribute(__preferred_name__) # define _LIBCPP_PREFERRED_NAME(x) __attribute__((__preferred_name__(x))) # else # define _LIBCPP_PREFERRED_NAME(x) # endif # if __has_attribute(__no_sanitize__) # define _LIBCPP_NO_SANITIZE(...) __attribute__((__no_sanitize__(__VA_ARGS__))) # else # define _LIBCPP_NO_SANITIZE(...) # endif # if __has_attribute(__init_priority__) # define _LIBCPP_INIT_PRIORITY_MAX __attribute__((__init_priority__(100))) # else # define _LIBCPP_INIT_PRIORITY_MAX # endif # if __has_attribute(__format__) // The attribute uses 1-based indices for ordinary and static member functions. // The attribute uses 2-based indices for non-static member functions. # define _LIBCPP_ATTRIBUTE_FORMAT(archetype, format_string_index, first_format_arg_index) \ __attribute__((__format__(archetype, format_string_index, first_format_arg_index))) # else # define _LIBCPP_ATTRIBUTE_FORMAT(archetype, format_string_index, first_format_arg_index) /* nothing */ # endif # if __has_attribute(__packed__) # define _LIBCPP_PACKED __attribute__((__packed__)) # else # define _LIBCPP_PACKED # endif # if defined(_LIBCPP_ABI_MICROSOFT) && __has_declspec_attribute(empty_bases) # define _LIBCPP_DECLSPEC_EMPTY_BASES __declspec(empty_bases) # else # define _LIBCPP_DECLSPEC_EMPTY_BASES # endif // Allow for build-time disabling of unsigned integer sanitization # if __has_attribute(no_sanitize) && !defined(_LIBCPP_COMPILER_GCC) # define _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK __attribute__((__no_sanitize__("unsigned-integer-overflow"))) # else # define _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK # endif // Clang-18 has support for deducing this, but it does not set the FTM. # if defined(__cpp_explicit_this_parameter) || (defined(_LIBCPP_CLANG_VER) && _LIBCPP_CLANG_VER >= 1800) # define _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER # endif #endif // __cplusplus #endif // _LIBCPP___CONFIG diff --git a/llvm/include/llvm/Demangle/Demangle.h b/llvm/include/llvm/Demangle/Demangle.h index fe129603c078..132e5088b551 100644 --- a/llvm/include/llvm/Demangle/Demangle.h +++ b/llvm/include/llvm/Demangle/Demangle.h @@ -1,132 +1,136 @@ //===--- Demangle.h ---------------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef LLVM_DEMANGLE_DEMANGLE_H #define LLVM_DEMANGLE_DEMANGLE_H #include +#include #include #include namespace llvm { /// This is a llvm local version of __cxa_demangle. Other than the name and /// being in the llvm namespace it is identical. /// /// The mangled_name is demangled into buf and returned. If the buffer is not /// large enough, realloc is used to expand it. /// /// The *status will be set to a value from the following enumeration enum : int { demangle_unknown_error = -4, demangle_invalid_args = -3, demangle_invalid_mangled_name = -2, demangle_memory_alloc_failure = -1, demangle_success = 0, }; /// Returns a non-NULL pointer to a NUL-terminated C style string /// that should be explicitly freed, if successful. Otherwise, may return /// nullptr if mangled_name is not a valid mangling or is nullptr. char *itaniumDemangle(std::string_view mangled_name, bool ParseParams = true); enum MSDemangleFlags { MSDF_None = 0, MSDF_DumpBackrefs = 1 << 0, MSDF_NoAccessSpecifier = 1 << 1, MSDF_NoCallingConvention = 1 << 2, MSDF_NoReturnType = 1 << 3, MSDF_NoMemberType = 1 << 4, MSDF_NoVariableType = 1 << 5, }; /// Demangles the Microsoft symbol pointed at by mangled_name and returns it. /// Returns a pointer to the start of a null-terminated demangled string on /// success, or nullptr on error. /// If n_read is non-null and demangling was successful, it receives how many /// bytes of the input string were consumed. /// status receives one of the demangle_ enum entries above if it's not nullptr. /// Flags controls various details of the demangled representation. char *microsoftDemangle(std::string_view mangled_name, size_t *n_read, int *status, MSDemangleFlags Flags = MSDF_None); +std::optional +getArm64ECInsertionPointInMangledName(std::string_view MangledName); + // Demangles a Rust v0 mangled symbol. char *rustDemangle(std::string_view MangledName); // Demangles a D mangled symbol. char *dlangDemangle(std::string_view MangledName); /// Attempt to demangle a string using different demangling schemes. /// The function uses heuristics to determine which demangling scheme to use. /// \param MangledName - reference to string to demangle. /// \returns - the demangled string, or a copy of the input string if no /// demangling occurred. std::string demangle(std::string_view MangledName); bool nonMicrosoftDemangle(std::string_view MangledName, std::string &Result, bool CanHaveLeadingDot = true, bool ParseParams = true); /// "Partial" demangler. This supports demangling a string into an AST /// (typically an intermediate stage in itaniumDemangle) and querying certain /// properties or partially printing the demangled name. struct ItaniumPartialDemangler { ItaniumPartialDemangler(); ItaniumPartialDemangler(ItaniumPartialDemangler &&Other); ItaniumPartialDemangler &operator=(ItaniumPartialDemangler &&Other); /// Demangle into an AST. Subsequent calls to the rest of the member functions /// implicitly operate on the AST this produces. /// \return true on error, false otherwise bool partialDemangle(const char *MangledName); /// Just print the entire mangled name into Buf. Buf and N behave like the /// second and third parameters to __cxa_demangle. char *finishDemangle(char *Buf, size_t *N) const; /// Get the base name of a function. This doesn't include trailing template /// arguments, ie for "a::b" this function returns "b". char *getFunctionBaseName(char *Buf, size_t *N) const; /// Get the context name for a function. For "a::b::c", this function returns /// "a::b". char *getFunctionDeclContextName(char *Buf, size_t *N) const; /// Get the entire name of this function. char *getFunctionName(char *Buf, size_t *N) const; /// Get the parameters for this function. char *getFunctionParameters(char *Buf, size_t *N) const; char *getFunctionReturnType(char *Buf, size_t *N) const; /// If this function has any cv or reference qualifiers. These imply that /// the function is a non-static member function. bool hasFunctionQualifiers() const; /// If this symbol describes a constructor or destructor. bool isCtorOrDtor() const; /// If this symbol describes a function. bool isFunction() const; /// If this symbol describes a variable. bool isData() const; /// If this symbol is a . These are generally implicitly /// generated by the implementation, such as vtables and typeinfo names. bool isSpecialName() const; ~ItaniumPartialDemangler(); private: void *RootNode; void *Context; }; } // namespace llvm #endif diff --git a/llvm/include/llvm/Demangle/MicrosoftDemangle.h b/llvm/include/llvm/Demangle/MicrosoftDemangle.h index 6891185a28e5..276efa760369 100644 --- a/llvm/include/llvm/Demangle/MicrosoftDemangle.h +++ b/llvm/include/llvm/Demangle/MicrosoftDemangle.h @@ -1,289 +1,293 @@ //===------------------------- MicrosoftDemangle.h --------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef LLVM_DEMANGLE_MICROSOFTDEMANGLE_H #define LLVM_DEMANGLE_MICROSOFTDEMANGLE_H +#include "llvm/Demangle/Demangle.h" #include "llvm/Demangle/MicrosoftDemangleNodes.h" #include #include #include namespace llvm { namespace ms_demangle { // This memory allocator is extremely fast, but it doesn't call dtors // for allocated objects. That means you can't use STL containers // (such as std::vector) with this allocator. But it pays off -- // the demangler is 3x faster with this allocator compared to one with // STL containers. constexpr size_t AllocUnit = 4096; class ArenaAllocator { struct AllocatorNode { uint8_t *Buf = nullptr; size_t Used = 0; size_t Capacity = 0; AllocatorNode *Next = nullptr; }; void addNode(size_t Capacity) { AllocatorNode *NewHead = new AllocatorNode; NewHead->Buf = new uint8_t[Capacity]; NewHead->Next = Head; NewHead->Capacity = Capacity; Head = NewHead; NewHead->Used = 0; } public: ArenaAllocator() { addNode(AllocUnit); } ~ArenaAllocator() { while (Head) { assert(Head->Buf); delete[] Head->Buf; AllocatorNode *Next = Head->Next; delete Head; Head = Next; } } // Delete the copy constructor and the copy assignment operator. ArenaAllocator(const ArenaAllocator &) = delete; ArenaAllocator &operator=(const ArenaAllocator &) = delete; char *allocUnalignedBuffer(size_t Size) { assert(Head && Head->Buf); uint8_t *P = Head->Buf + Head->Used; Head->Used += Size; if (Head->Used <= Head->Capacity) return reinterpret_cast(P); addNode(std::max(AllocUnit, Size)); Head->Used = Size; return reinterpret_cast(Head->Buf); } template T *allocArray(size_t Count) { size_t Size = Count * sizeof(T); assert(Head && Head->Buf); size_t P = (size_t)Head->Buf + Head->Used; uintptr_t AlignedP = (((size_t)P + alignof(T) - 1) & ~(size_t)(alignof(T) - 1)); uint8_t *PP = (uint8_t *)AlignedP; size_t Adjustment = AlignedP - P; Head->Used += Size + Adjustment; if (Head->Used <= Head->Capacity) return new (PP) T[Count](); addNode(std::max(AllocUnit, Size)); Head->Used = Size; return new (Head->Buf) T[Count](); } template T *alloc(Args &&... ConstructorArgs) { constexpr size_t Size = sizeof(T); assert(Head && Head->Buf); size_t P = (size_t)Head->Buf + Head->Used; uintptr_t AlignedP = (((size_t)P + alignof(T) - 1) & ~(size_t)(alignof(T) - 1)); uint8_t *PP = (uint8_t *)AlignedP; size_t Adjustment = AlignedP - P; Head->Used += Size + Adjustment; if (Head->Used <= Head->Capacity) return new (PP) T(std::forward(ConstructorArgs)...); static_assert(Size < AllocUnit); addNode(AllocUnit); Head->Used = Size; return new (Head->Buf) T(std::forward(ConstructorArgs)...); } private: AllocatorNode *Head = nullptr; }; struct BackrefContext { static constexpr size_t Max = 10; TypeNode *FunctionParams[Max]; size_t FunctionParamCount = 0; // The first 10 BackReferences in a mangled name can be back-referenced by // special name @[0-9]. This is a storage for the first 10 BackReferences. NamedIdentifierNode *Names[Max]; size_t NamesCount = 0; }; enum class QualifierMangleMode { Drop, Mangle, Result }; enum NameBackrefBehavior : uint8_t { NBB_None = 0, // don't save any names as backrefs. NBB_Template = 1 << 0, // save template instanations. NBB_Simple = 1 << 1, // save simple names. }; enum class FunctionIdentifierCodeGroup { Basic, Under, DoubleUnder }; // Demangler class takes the main role in demangling symbols. // It has a set of functions to parse mangled symbols into Type instances. // It also has a set of functions to convert Type instances to strings. class Demangler { + friend std::optional + llvm::getArm64ECInsertionPointInMangledName(std::string_view MangledName); + public: Demangler() = default; virtual ~Demangler() = default; // You are supposed to call parse() first and then check if error is true. If // it is false, call output() to write the formatted name to the given stream. SymbolNode *parse(std::string_view &MangledName); TagTypeNode *parseTagUniqueName(std::string_view &MangledName); // True if an error occurred. bool Error = false; void dumpBackReferences(); private: SymbolNode *demangleEncodedSymbol(std::string_view &MangledName, QualifiedNameNode *QN); SymbolNode *demangleDeclarator(std::string_view &MangledName); SymbolNode *demangleMD5Name(std::string_view &MangledName); SymbolNode *demangleTypeinfoName(std::string_view &MangledName); VariableSymbolNode *demangleVariableEncoding(std::string_view &MangledName, StorageClass SC); FunctionSymbolNode *demangleFunctionEncoding(std::string_view &MangledName); Qualifiers demanglePointerExtQualifiers(std::string_view &MangledName); // Parser functions. This is a recursive-descent parser. TypeNode *demangleType(std::string_view &MangledName, QualifierMangleMode QMM); PrimitiveTypeNode *demanglePrimitiveType(std::string_view &MangledName); CustomTypeNode *demangleCustomType(std::string_view &MangledName); TagTypeNode *demangleClassType(std::string_view &MangledName); PointerTypeNode *demanglePointerType(std::string_view &MangledName); PointerTypeNode *demangleMemberPointerType(std::string_view &MangledName); FunctionSignatureNode *demangleFunctionType(std::string_view &MangledName, bool HasThisQuals); ArrayTypeNode *demangleArrayType(std::string_view &MangledName); NodeArrayNode *demangleFunctionParameterList(std::string_view &MangledName, bool &IsVariadic); NodeArrayNode *demangleTemplateParameterList(std::string_view &MangledName); std::pair demangleNumber(std::string_view &MangledName); uint64_t demangleUnsigned(std::string_view &MangledName); int64_t demangleSigned(std::string_view &MangledName); void memorizeString(std::string_view s); void memorizeIdentifier(IdentifierNode *Identifier); /// Allocate a copy of \p Borrowed into memory that we own. std::string_view copyString(std::string_view Borrowed); QualifiedNameNode * demangleFullyQualifiedTypeName(std::string_view &MangledName); QualifiedNameNode * demangleFullyQualifiedSymbolName(std::string_view &MangledName); IdentifierNode *demangleUnqualifiedTypeName(std::string_view &MangledName, bool Memorize); IdentifierNode *demangleUnqualifiedSymbolName(std::string_view &MangledName, NameBackrefBehavior NBB); QualifiedNameNode *demangleNameScopeChain(std::string_view &MangledName, IdentifierNode *UnqualifiedName); IdentifierNode *demangleNameScopePiece(std::string_view &MangledName); NamedIdentifierNode *demangleBackRefName(std::string_view &MangledName); IdentifierNode * demangleTemplateInstantiationName(std::string_view &MangledName, NameBackrefBehavior NBB); IntrinsicFunctionKind translateIntrinsicFunctionCode(char CH, FunctionIdentifierCodeGroup Group); IdentifierNode *demangleFunctionIdentifierCode(std::string_view &MangledName); IdentifierNode * demangleFunctionIdentifierCode(std::string_view &MangledName, FunctionIdentifierCodeGroup Group); StructorIdentifierNode * demangleStructorIdentifier(std::string_view &MangledName, bool IsDestructor); ConversionOperatorIdentifierNode * demangleConversionOperatorIdentifier(std::string_view &MangledName); LiteralOperatorIdentifierNode * demangleLiteralOperatorIdentifier(std::string_view &MangledName); SymbolNode *demangleSpecialIntrinsic(std::string_view &MangledName); SpecialTableSymbolNode * demangleSpecialTableSymbolNode(std::string_view &MangledName, SpecialIntrinsicKind SIK); LocalStaticGuardVariableNode * demangleLocalStaticGuard(std::string_view &MangledName, bool IsThread); VariableSymbolNode *demangleUntypedVariable(ArenaAllocator &Arena, std::string_view &MangledName, std::string_view VariableName); VariableSymbolNode * demangleRttiBaseClassDescriptorNode(ArenaAllocator &Arena, std::string_view &MangledName); FunctionSymbolNode *demangleInitFiniStub(std::string_view &MangledName, bool IsDestructor); NamedIdentifierNode *demangleSimpleName(std::string_view &MangledName, bool Memorize); NamedIdentifierNode * demangleAnonymousNamespaceName(std::string_view &MangledName); NamedIdentifierNode * demangleLocallyScopedNamePiece(std::string_view &MangledName); EncodedStringLiteralNode * demangleStringLiteral(std::string_view &MangledName); FunctionSymbolNode *demangleVcallThunkNode(std::string_view &MangledName); std::string_view demangleSimpleString(std::string_view &MangledName, bool Memorize); FuncClass demangleFunctionClass(std::string_view &MangledName); CallingConv demangleCallingConvention(std::string_view &MangledName); StorageClass demangleVariableStorageClass(std::string_view &MangledName); bool demangleThrowSpecification(std::string_view &MangledName); wchar_t demangleWcharLiteral(std::string_view &MangledName); uint8_t demangleCharLiteral(std::string_view &MangledName); std::pair demangleQualifiers(std::string_view &MangledName); // Memory allocator. ArenaAllocator Arena; // A single type uses one global back-ref table for all function params. // This means back-refs can even go "into" other types. Examples: // // // Second int* is a back-ref to first. // void foo(int *, int*); // // // Second int* is not a back-ref to first (first is not a function param). // int* foo(int*); // // // Second int* is a back-ref to first (ALL function types share the same // // back-ref map. // using F = void(*)(int*); // F G(int *); BackrefContext Backrefs; }; } // namespace ms_demangle } // namespace llvm #endif // LLVM_DEMANGLE_MICROSOFTDEMANGLE_H diff --git a/llvm/include/llvm/IR/Mangler.h b/llvm/include/llvm/IR/Mangler.h index f28ffc961b6d..33af40c5ae98 100644 --- a/llvm/include/llvm/IR/Mangler.h +++ b/llvm/include/llvm/IR/Mangler.h @@ -1,61 +1,67 @@ //===-- llvm/IR/Mangler.h - Self-contained name mangler ---------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // Unified name mangler for various backends. // //===----------------------------------------------------------------------===// #ifndef LLVM_IR_MANGLER_H #define LLVM_IR_MANGLER_H #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringRef.h" namespace llvm { class DataLayout; class GlobalValue; template class SmallVectorImpl; class Triple; class Twine; class raw_ostream; class Mangler { /// We need to give global values the same name every time they are mangled. /// This keeps track of the number we give to anonymous ones. mutable DenseMap AnonGlobalIDs; public: /// Print the appropriate prefix and the specified global variable's name. /// If the global variable doesn't have a name, this fills in a unique name /// for the global. void getNameWithPrefix(raw_ostream &OS, const GlobalValue *GV, bool CannotUsePrivateLabel) const; void getNameWithPrefix(SmallVectorImpl &OutName, const GlobalValue *GV, bool CannotUsePrivateLabel) const; /// Print the appropriate prefix and the specified name as the global variable /// name. GVName must not be empty. static void getNameWithPrefix(raw_ostream &OS, const Twine &GVName, const DataLayout &DL); static void getNameWithPrefix(SmallVectorImpl &OutName, const Twine &GVName, const DataLayout &DL); }; void emitLinkerFlagsForGlobalCOFF(raw_ostream &OS, const GlobalValue *GV, const Triple &TT, Mangler &Mangler); void emitLinkerFlagsForUsedCOFF(raw_ostream &OS, const GlobalValue *GV, const Triple &T, Mangler &M); std::optional getArm64ECMangledFunctionName(StringRef Name); std::optional getArm64ECDemangledFunctionName(StringRef Name); +/// Check if an ARM64EC function name is mangled. +bool inline isArm64ECMangledFunctionName(StringRef Name) { + return Name[0] == '#' || + (Name[0] == '?' && Name.find("@$$h") != StringRef::npos); +} + } // End llvm namespace #endif diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index 46044aab79a8..e7895258438d 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -1,2045 +1,2049 @@ //===- TargetSelectionDAG.td - Common code for DAG isels ---*- tablegen -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file defines the target-independent interfaces used by SelectionDAG // instruction selection generators. // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // Selection DAG Type Constraint definitions. // // Note that the semantics of these constraints are hard coded into tblgen. To // modify or add constraints, you have to hack tblgen. // class SDTypeConstraint { int OperandNum = opnum; } // SDTCisVT - The specified operand has exactly this VT. class SDTCisVT : SDTypeConstraint { ValueType VT = vt; } class SDTCisPtrTy : SDTypeConstraint; // SDTCisInt - The specified operand has integer type. class SDTCisInt : SDTypeConstraint; // SDTCisFP - The specified operand has floating-point type. class SDTCisFP : SDTypeConstraint; // SDTCisVec - The specified operand has a vector type. class SDTCisVec : SDTypeConstraint; // SDTCisSameAs - The two specified operands have identical types. class SDTCisSameAs : SDTypeConstraint { int OtherOperandNum = OtherOp; } // SDTCisVTSmallerThanOp - The specified operand is a VT SDNode, and its type is // smaller than the 'Other' operand. class SDTCisVTSmallerThanOp : SDTypeConstraint { int OtherOperandNum = OtherOp; } class SDTCisOpSmallerThanOp : SDTypeConstraint{ int BigOperandNum = BigOp; } /// SDTCisEltOfVec - This indicates that ThisOp is a scalar type of the same /// type as the element type of OtherOp, which is a vector type. class SDTCisEltOfVec : SDTypeConstraint { int OtherOpNum = OtherOp; } /// SDTCisSubVecOfVec - This indicates that ThisOp is a vector type /// with length less that of OtherOp, which is a vector type. class SDTCisSubVecOfVec : SDTypeConstraint { int OtherOpNum = OtherOp; } // SDTCVecEltisVT - The specified operand is vector type with element type // of VT. class SDTCVecEltisVT : SDTypeConstraint { ValueType VT = vt; } // SDTCisSameNumEltsAs - The two specified operands have identical number // of elements. class SDTCisSameNumEltsAs : SDTypeConstraint { int OtherOperandNum = OtherOp; } // SDTCisSameSizeAs - The two specified operands have identical size. class SDTCisSameSizeAs : SDTypeConstraint { int OtherOperandNum = OtherOp; } //===----------------------------------------------------------------------===// // Selection DAG Type Profile definitions. // // These use the constraints defined above to describe the type requirements of // the various nodes. These are not hard coded into tblgen, allowing targets to // add their own if needed. // // SDTypeProfile - This profile describes the type requirements of a Selection // DAG node. class SDTypeProfile constraints> { int NumResults = numresults; int NumOperands = numoperands; list Constraints = constraints; } // Builtin profiles. def SDTIntLeaf: SDTypeProfile<1, 0, [SDTCisInt<0>]>; // for 'imm'. def SDTFPLeaf : SDTypeProfile<1, 0, [SDTCisFP<0>]>; // for 'fpimm'. def SDTPtrLeaf: SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>; // for '&g'. def SDTOther : SDTypeProfile<1, 0, [SDTCisVT<0, OtherVT>]>; // for 'vt'. def SDTUNDEF : SDTypeProfile<1, 0, []>; // for 'undef'. def SDTUnaryOp : SDTypeProfile<1, 1, []>; // for bitconvert. def SDTPtrAddOp : SDTypeProfile<1, 2, [ // ptradd SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisPtrTy<1> ]>; def SDTIntBinOp : SDTypeProfile<1, 2, [ // add, and, or, xor, udiv, etc. SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0> ]>; def SDTIntShiftOp : SDTypeProfile<1, 2, [ // shl, sra, srl SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<2> ]>; def SDTIntShiftDOp: SDTypeProfile<1, 3, [ // fshl, fshr SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3> ]>; def SDTIntSatNoShOp : SDTypeProfile<1, 2, [ // ssat with no shift SDTCisSameAs<0, 1>, SDTCisInt<2> ]>; def SDTIntBinHiLoOp : SDTypeProfile<2, 2, [ // mulhi, mullo, sdivrem, udivrem SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,SDTCisInt<0> ]>; def SDTIntScaledBinOp : SDTypeProfile<1, 3, [ // smulfix, sdivfix, etc SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3> ]>; def SDTFPBinOp : SDTypeProfile<1, 2, [ // fadd, fmul, etc. SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0> ]>; def SDTFPSignOp : SDTypeProfile<1, 2, [ // fcopysign. SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisFP<2> ]>; def SDTFPTernaryOp : SDTypeProfile<1, 3, [ // fmadd, fnmsub, etc. SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisFP<0> ]>; def SDTIntUnaryOp : SDTypeProfile<1, 1, [ // bitreverse SDTCisSameAs<0, 1>, SDTCisInt<0> ]>; def SDTIntBitCountUnaryOp : SDTypeProfile<1, 1, [ // ctlz, cttz SDTCisInt<0>, SDTCisInt<1> ]>; def SDTIntExtendOp : SDTypeProfile<1, 1, [ // sext, zext, anyext SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0>, SDTCisSameNumEltsAs<0, 1> ]>; def SDTIntTruncOp : SDTypeProfile<1, 1, [ // trunc SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<0, 1>, SDTCisSameNumEltsAs<0, 1> ]>; def SDTFPUnaryOp : SDTypeProfile<1, 1, [ // fneg, fsqrt, etc SDTCisSameAs<0, 1>, SDTCisFP<0> ]>; def SDTFPRoundOp : SDTypeProfile<1, 1, [ // fpround SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<0, 1>, SDTCisSameNumEltsAs<0, 1> ]>; def SDTFPExtendOp : SDTypeProfile<1, 1, [ // fpextend SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<1, 0>, SDTCisSameNumEltsAs<0, 1> ]>; def SDIsFPClassOp : SDTypeProfile<1, 2, [ // is_fpclass SDTCisInt<0>, SDTCisFP<1>, SDTCisInt<2>, SDTCisSameNumEltsAs<0, 1> ]>; def SDTIntToFPOp : SDTypeProfile<1, 1, [ // [su]int_to_fp SDTCisFP<0>, SDTCisInt<1>, SDTCisSameNumEltsAs<0, 1> ]>; def SDTFPToIntOp : SDTypeProfile<1, 1, [ // fp_to_[su]int SDTCisInt<0>, SDTCisFP<1>, SDTCisSameNumEltsAs<0, 1> ]>; def SDTFPToIntSatOp : SDTypeProfile<1, 2, [ // fp_to_[su]int_sat SDTCisInt<0>, SDTCisFP<1>, SDTCisSameNumEltsAs<0, 1>, SDTCisVT<2, OtherVT> ]>; def SDTFPExpOp : SDTypeProfile<1, 2, [ // ldexp SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2> ]>; def SDTGetFPStateOp : SDTypeProfile<1, 0, [ // get_fpenv, get_fpmode SDTCisInt<0> ]>; def SDTSetFPStateOp : SDTypeProfile<0, 1, [ // set_fpenv, set_fpmode SDTCisInt<0> ]>; def SDTExtInreg : SDTypeProfile<1, 2, [ // sext_inreg SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisVT<2, OtherVT>, SDTCisVTSmallerThanOp<2, 1> ]>; def SDTExtInvec : SDTypeProfile<1, 1, [ // sext_invec SDTCisInt<0>, SDTCisVec<0>, SDTCisInt<1>, SDTCisVec<1>, SDTCisOpSmallerThanOp<1, 0> ]>; def SDTFreeze : SDTypeProfile<1, 1, [ SDTCisSameAs<0, 1> ]>; def SDTSetCC : SDTypeProfile<1, 3, [ // setcc SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, OtherVT> ]>; def SDTSelect : SDTypeProfile<1, 3, [ // select SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3> ]>; def SDTVSelect : SDTypeProfile<1, 3, [ // vselect SDTCisVec<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>, SDTCisSameNumEltsAs<0, 1> ]>; def SDTSelectCC : SDTypeProfile<1, 5, [ // select_cc SDTCisSameAs<1, 2>, SDTCisSameAs<3, 4>, SDTCisSameAs<0, 3>, SDTCisVT<5, OtherVT> ]>; def SDTBr : SDTypeProfile<0, 1, [ // br SDTCisVT<0, OtherVT> ]>; def SDTBrCC : SDTypeProfile<0, 4, [ // brcc SDTCisVT<0, OtherVT>, SDTCisSameAs<1, 2>, SDTCisVT<3, OtherVT> ]>; def SDTBrcond : SDTypeProfile<0, 2, [ // brcond SDTCisInt<0>, SDTCisVT<1, OtherVT> ]>; def SDTBrind : SDTypeProfile<0, 1, [ // brind SDTCisPtrTy<0> ]>; def SDTCatchret : SDTypeProfile<0, 2, [ // catchret SDTCisVT<0, OtherVT>, SDTCisVT<1, OtherVT> ]>; +def SDTCleanupret : SDTypeProfile<0, 1, [ // cleanupret + SDTCisVT<0, OtherVT> +]>; + def SDTNone : SDTypeProfile<0, 0, []>; // ret, trap def SDTUBSANTrap : SDTypeProfile<0, 1, []>; // ubsantrap def SDTLoad : SDTypeProfile<1, 1, [ // load SDTCisPtrTy<1> ]>; def SDTStore : SDTypeProfile<0, 2, [ // store SDTCisPtrTy<1> ]>; def SDTIStore : SDTypeProfile<1, 3, [ // indexed store SDTCisSameAs<0, 2>, SDTCisPtrTy<0>, SDTCisPtrTy<3> ]>; def SDTMaskedStore: SDTypeProfile<0, 4, [ // masked store SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisPtrTy<2>, SDTCisVec<3>, SDTCisSameNumEltsAs<0, 3> ]>; def SDTMaskedLoad: SDTypeProfile<1, 4, [ // masked load SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisPtrTy<2>, SDTCisVec<3>, SDTCisSameAs<0, 4>, SDTCisSameNumEltsAs<0, 3> ]>; def SDTMaskedGather : SDTypeProfile<1, 4, [ SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVec<2>, SDTCisPtrTy<3>, SDTCisVec<4>, SDTCisSameNumEltsAs<0, 2>, SDTCisSameNumEltsAs<0, 4> ]>; def SDTMaskedScatter : SDTypeProfile<0, 4, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>, SDTCisVec<3>, SDTCisSameNumEltsAs<0, 1>, SDTCisSameNumEltsAs<0, 3> ]>; def SDTVectorCompress : SDTypeProfile<1, 3, [ SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVec<2>, SDTCisSameNumEltsAs<1, 2>, SDTCisSameAs<1, 3> ]>; def SDTVecShuffle : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> ]>; def SDTVecSlice : SDTypeProfile<1, 3, [ // vector splice SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisInt<3> ]>; def SDTVecExtract : SDTypeProfile<1, 2, [ // vector extract SDTCisEltOfVec<0, 1>, SDTCisPtrTy<2> ]>; def SDTVecInsert : SDTypeProfile<1, 3, [ // vector insert SDTCisEltOfVec<2, 1>, SDTCisSameAs<0, 1>, SDTCisPtrTy<3> ]>; def SDTVecReduce : SDTypeProfile<1, 1, [ // vector reduction SDTCisInt<0>, SDTCisVec<1> ]>; def SDTFPVecReduce : SDTypeProfile<1, 1, [ // FP vector reduction SDTCisFP<0>, SDTCisVec<1> ]>; def SDTVecReverse : SDTypeProfile<1, 1, [ // vector reverse SDTCisVec<0>, SDTCisSameAs<0,1> ]>; def SDTSubVecExtract : SDTypeProfile<1, 2, [// subvector extract SDTCisSubVecOfVec<0,1>, SDTCisInt<2> ]>; def SDTSubVecInsert : SDTypeProfile<1, 3, [ // subvector insert SDTCisSubVecOfVec<2, 1>, SDTCisSameAs<0,1>, SDTCisInt<3> ]>; def SDTPrefetch : SDTypeProfile<0, 4, [ // prefetch SDTCisPtrTy<0>, SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>, SDTCisInt<1> ]>; def SDTAtomicFence : SDTypeProfile<0, 2, [ SDTCisSameAs<0,1>, SDTCisPtrTy<0> ]>; def SDTAtomic3 : SDTypeProfile<1, 3, [ SDTCisSameAs<0,2>, SDTCisSameAs<0,3>, SDTCisInt<0>, SDTCisPtrTy<1> ]>; def SDTAtomic2 : SDTypeProfile<1, 2, [ SDTCisSameAs<0,2>, SDTCisInt<0>, SDTCisPtrTy<1> ]>; def SDTFPAtomic2 : SDTypeProfile<1, 2, [ SDTCisSameAs<0,2>, SDTCisFP<0>, SDTCisPtrTy<1> ]>; def SDTAtomicStore : SDTypeProfile<0, 2, [ SDTCisInt<0>, SDTCisPtrTy<1> ]>; def SDTAtomicLoad : SDTypeProfile<1, 1, [ SDTCisPtrTy<1> ]>; class SDCallSeqStart constraints> : SDTypeProfile<0, 2, constraints>; class SDCallSeqEnd constraints> : SDTypeProfile<0, 2, constraints>; //===----------------------------------------------------------------------===// // Selection DAG Node definitions. // class SDNode props = [], string sdclass = "SDNode"> : SDPatternOperator { string Opcode = opcode; string SDClass = sdclass; let Properties = props; SDTypeProfile TypeProfile = typeprof; } // Special TableGen-recognized dag nodes def set; def implicit; def node; def srcvalue; def imm : SDNode<"ISD::Constant" , SDTIntLeaf , [], "ConstantSDNode">; def timm : SDNode<"ISD::TargetConstant",SDTIntLeaf, [], "ConstantSDNode">; def fpimm : SDNode<"ISD::ConstantFP", SDTFPLeaf , [], "ConstantFPSDNode">; def vt : SDNode<"ISD::VALUETYPE" , SDTOther , [], "VTSDNode">; def bb : SDNode<"ISD::BasicBlock", SDTOther , [], "BasicBlockSDNode">; def cond : SDNode<"ISD::CONDCODE" , SDTOther , [], "CondCodeSDNode">; def undef : SDNode<"ISD::UNDEF" , SDTUNDEF , []>; def vscale : SDNode<"ISD::VSCALE" , SDTIntUnaryOp, []>; def globaladdr : SDNode<"ISD::GlobalAddress", SDTPtrLeaf, [], "GlobalAddressSDNode">; def tglobaladdr : SDNode<"ISD::TargetGlobalAddress", SDTPtrLeaf, [], "GlobalAddressSDNode">; def globaltlsaddr : SDNode<"ISD::GlobalTLSAddress", SDTPtrLeaf, [], "GlobalAddressSDNode">; def tglobaltlsaddr : SDNode<"ISD::TargetGlobalTLSAddress", SDTPtrLeaf, [], "GlobalAddressSDNode">; def constpool : SDNode<"ISD::ConstantPool", SDTPtrLeaf, [], "ConstantPoolSDNode">; def tconstpool : SDNode<"ISD::TargetConstantPool", SDTPtrLeaf, [], "ConstantPoolSDNode">; def jumptable : SDNode<"ISD::JumpTable", SDTPtrLeaf, [], "JumpTableSDNode">; def tjumptable : SDNode<"ISD::TargetJumpTable", SDTPtrLeaf, [], "JumpTableSDNode">; def frameindex : SDNode<"ISD::FrameIndex", SDTPtrLeaf, [], "FrameIndexSDNode">; def tframeindex : SDNode<"ISD::TargetFrameIndex", SDTPtrLeaf, [], "FrameIndexSDNode">; def externalsym : SDNode<"ISD::ExternalSymbol", SDTPtrLeaf, [], "ExternalSymbolSDNode">; def texternalsym: SDNode<"ISD::TargetExternalSymbol", SDTPtrLeaf, [], "ExternalSymbolSDNode">; def mcsym: SDNode<"ISD::MCSymbol", SDTPtrLeaf, [], "MCSymbolSDNode">; def blockaddress : SDNode<"ISD::BlockAddress", SDTPtrLeaf, [], "BlockAddressSDNode">; def tblockaddress: SDNode<"ISD::TargetBlockAddress", SDTPtrLeaf, [], "BlockAddressSDNode">; def add : SDNode<"ISD::ADD" , SDTIntBinOp , [SDNPCommutative, SDNPAssociative]>; def ptradd : SDNode<"ISD::ADD" , SDTPtrAddOp, []>; def sub : SDNode<"ISD::SUB" , SDTIntBinOp>; def mul : SDNode<"ISD::MUL" , SDTIntBinOp, [SDNPCommutative, SDNPAssociative]>; def mulhs : SDNode<"ISD::MULHS" , SDTIntBinOp, [SDNPCommutative]>; def mulhu : SDNode<"ISD::MULHU" , SDTIntBinOp, [SDNPCommutative]>; def avgfloors : SDNode<"ISD::AVGFLOORS" , SDTIntBinOp, [SDNPCommutative]>; def avgflooru : SDNode<"ISD::AVGFLOORU" , SDTIntBinOp, [SDNPCommutative]>; def avgceils : SDNode<"ISD::AVGCEILS" , SDTIntBinOp, [SDNPCommutative]>; def avgceilu : SDNode<"ISD::AVGCEILU" , SDTIntBinOp, [SDNPCommutative]>; def abds : SDNode<"ISD::ABDS" , SDTIntBinOp, [SDNPCommutative]>; def abdu : SDNode<"ISD::ABDU" , SDTIntBinOp, [SDNPCommutative]>; def smullohi : SDNode<"ISD::SMUL_LOHI" , SDTIntBinHiLoOp, [SDNPCommutative]>; def umullohi : SDNode<"ISD::UMUL_LOHI" , SDTIntBinHiLoOp, [SDNPCommutative]>; def sdiv : SDNode<"ISD::SDIV" , SDTIntBinOp>; def udiv : SDNode<"ISD::UDIV" , SDTIntBinOp>; def srem : SDNode<"ISD::SREM" , SDTIntBinOp>; def urem : SDNode<"ISD::UREM" , SDTIntBinOp>; def sdivrem : SDNode<"ISD::SDIVREM" , SDTIntBinHiLoOp>; def udivrem : SDNode<"ISD::UDIVREM" , SDTIntBinHiLoOp>; def srl : SDNode<"ISD::SRL" , SDTIntShiftOp>; def sra : SDNode<"ISD::SRA" , SDTIntShiftOp>; def shl : SDNode<"ISD::SHL" , SDTIntShiftOp>; def rotl : SDNode<"ISD::ROTL" , SDTIntShiftOp>; def rotr : SDNode<"ISD::ROTR" , SDTIntShiftOp>; def fshl : SDNode<"ISD::FSHL" , SDTIntShiftDOp>; def fshr : SDNode<"ISD::FSHR" , SDTIntShiftDOp>; def and : SDNode<"ISD::AND" , SDTIntBinOp, [SDNPCommutative, SDNPAssociative]>; def or : SDNode<"ISD::OR" , SDTIntBinOp, [SDNPCommutative, SDNPAssociative]>; def xor : SDNode<"ISD::XOR" , SDTIntBinOp, [SDNPCommutative, SDNPAssociative]>; def addc : SDNode<"ISD::ADDC" , SDTIntBinOp, [SDNPCommutative, SDNPOutGlue]>; def adde : SDNode<"ISD::ADDE" , SDTIntBinOp, [SDNPCommutative, SDNPOutGlue, SDNPInGlue]>; def subc : SDNode<"ISD::SUBC" , SDTIntBinOp, [SDNPOutGlue]>; def sube : SDNode<"ISD::SUBE" , SDTIntBinOp, [SDNPOutGlue, SDNPInGlue]>; def smin : SDNode<"ISD::SMIN" , SDTIntBinOp, [SDNPCommutative, SDNPAssociative]>; def smax : SDNode<"ISD::SMAX" , SDTIntBinOp, [SDNPCommutative, SDNPAssociative]>; def umin : SDNode<"ISD::UMIN" , SDTIntBinOp, [SDNPCommutative, SDNPAssociative]>; def umax : SDNode<"ISD::UMAX" , SDTIntBinOp, [SDNPCommutative, SDNPAssociative]>; def scmp : SDNode<"ISD::SCMP" , SDTIntBinOp, []>; def ucmp : SDNode<"ISD::UCMP" , SDTIntBinOp, []>; def saddsat : SDNode<"ISD::SADDSAT" , SDTIntBinOp, [SDNPCommutative]>; def uaddsat : SDNode<"ISD::UADDSAT" , SDTIntBinOp, [SDNPCommutative]>; def ssubsat : SDNode<"ISD::SSUBSAT" , SDTIntBinOp>; def usubsat : SDNode<"ISD::USUBSAT" , SDTIntBinOp>; def sshlsat : SDNode<"ISD::SSHLSAT" , SDTIntBinOp>; def ushlsat : SDNode<"ISD::USHLSAT" , SDTIntBinOp>; def smulfix : SDNode<"ISD::SMULFIX" , SDTIntScaledBinOp, [SDNPCommutative]>; def smulfixsat : SDNode<"ISD::SMULFIXSAT", SDTIntScaledBinOp, [SDNPCommutative]>; def umulfix : SDNode<"ISD::UMULFIX" , SDTIntScaledBinOp, [SDNPCommutative]>; def umulfixsat : SDNode<"ISD::UMULFIXSAT", SDTIntScaledBinOp, [SDNPCommutative]>; def sdivfix : SDNode<"ISD::SDIVFIX" , SDTIntScaledBinOp>; def sdivfixsat : SDNode<"ISD::SDIVFIXSAT", SDTIntScaledBinOp>; def udivfix : SDNode<"ISD::UDIVFIX" , SDTIntScaledBinOp>; def udivfixsat : SDNode<"ISD::UDIVFIXSAT", SDTIntScaledBinOp>; def sext_inreg : SDNode<"ISD::SIGN_EXTEND_INREG", SDTExtInreg>; def sext_invec : SDNode<"ISD::SIGN_EXTEND_VECTOR_INREG", SDTExtInvec>; def zext_invec : SDNode<"ISD::ZERO_EXTEND_VECTOR_INREG", SDTExtInvec>; def abs : SDNode<"ISD::ABS" , SDTIntUnaryOp>; def bitreverse : SDNode<"ISD::BITREVERSE" , SDTIntUnaryOp>; def bswap : SDNode<"ISD::BSWAP" , SDTIntUnaryOp>; def ctlz : SDNode<"ISD::CTLZ" , SDTIntBitCountUnaryOp>; def cttz : SDNode<"ISD::CTTZ" , SDTIntBitCountUnaryOp>; def ctpop : SDNode<"ISD::CTPOP" , SDTIntBitCountUnaryOp>; def ctlz_zero_undef : SDNode<"ISD::CTLZ_ZERO_UNDEF", SDTIntBitCountUnaryOp>; def cttz_zero_undef : SDNode<"ISD::CTTZ_ZERO_UNDEF", SDTIntBitCountUnaryOp>; def sext : SDNode<"ISD::SIGN_EXTEND", SDTIntExtendOp>; def zext : SDNode<"ISD::ZERO_EXTEND", SDTIntExtendOp>; def anyext : SDNode<"ISD::ANY_EXTEND" , SDTIntExtendOp>; def trunc : SDNode<"ISD::TRUNCATE" , SDTIntTruncOp>; def bitconvert : SDNode<"ISD::BITCAST" , SDTUnaryOp>; def addrspacecast : SDNode<"ISD::ADDRSPACECAST", SDTUnaryOp>; def freeze : SDNode<"ISD::FREEZE" , SDTFreeze>; def extractelt : SDNode<"ISD::EXTRACT_VECTOR_ELT", SDTVecExtract>; def insertelt : SDNode<"ISD::INSERT_VECTOR_ELT", SDTVecInsert>; def vecreduce_add : SDNode<"ISD::VECREDUCE_ADD", SDTVecReduce>; def vecreduce_smax : SDNode<"ISD::VECREDUCE_SMAX", SDTVecReduce>; def vecreduce_umax : SDNode<"ISD::VECREDUCE_UMAX", SDTVecReduce>; def vecreduce_smin : SDNode<"ISD::VECREDUCE_SMIN", SDTVecReduce>; def vecreduce_umin : SDNode<"ISD::VECREDUCE_UMIN", SDTVecReduce>; def vecreduce_fadd : SDNode<"ISD::VECREDUCE_FADD", SDTFPVecReduce>; def vecreduce_fmin : SDNode<"ISD::VECREDUCE_FMIN", SDTFPVecReduce>; def vecreduce_fmax : SDNode<"ISD::VECREDUCE_FMAX", SDTFPVecReduce>; def vecreduce_fminimum : SDNode<"ISD::VECREDUCE_FMINIMUM", SDTFPVecReduce>; def vecreduce_fmaximum : SDNode<"ISD::VECREDUCE_FMAXIMUM", SDTFPVecReduce>; def fadd : SDNode<"ISD::FADD" , SDTFPBinOp, [SDNPCommutative]>; def fsub : SDNode<"ISD::FSUB" , SDTFPBinOp>; def fmul : SDNode<"ISD::FMUL" , SDTFPBinOp, [SDNPCommutative]>; def fdiv : SDNode<"ISD::FDIV" , SDTFPBinOp>; def frem : SDNode<"ISD::FREM" , SDTFPBinOp>; def fma : SDNode<"ISD::FMA" , SDTFPTernaryOp, [SDNPCommutative]>; def fmad : SDNode<"ISD::FMAD" , SDTFPTernaryOp, [SDNPCommutative]>; def fabs : SDNode<"ISD::FABS" , SDTFPUnaryOp>; def fminnum : SDNode<"ISD::FMINNUM" , SDTFPBinOp, [SDNPCommutative, SDNPAssociative]>; def fmaxnum : SDNode<"ISD::FMAXNUM" , SDTFPBinOp, [SDNPCommutative, SDNPAssociative]>; def fminnum_ieee : SDNode<"ISD::FMINNUM_IEEE", SDTFPBinOp, [SDNPCommutative]>; def fmaxnum_ieee : SDNode<"ISD::FMAXNUM_IEEE", SDTFPBinOp, [SDNPCommutative]>; def fminimum : SDNode<"ISD::FMINIMUM" , SDTFPBinOp, [SDNPCommutative, SDNPAssociative]>; def fmaximum : SDNode<"ISD::FMAXIMUM" , SDTFPBinOp, [SDNPCommutative, SDNPAssociative]>; def fgetsign : SDNode<"ISD::FGETSIGN" , SDTFPToIntOp>; def fcanonicalize : SDNode<"ISD::FCANONICALIZE", SDTFPUnaryOp>; def fneg : SDNode<"ISD::FNEG" , SDTFPUnaryOp>; def fsqrt : SDNode<"ISD::FSQRT" , SDTFPUnaryOp>; def fsin : SDNode<"ISD::FSIN" , SDTFPUnaryOp>; def fcos : SDNode<"ISD::FCOS" , SDTFPUnaryOp>; def ftan : SDNode<"ISD::FTAN" , SDTFPUnaryOp>; def fasin : SDNode<"ISD::FASIN" , SDTFPUnaryOp>; def facos : SDNode<"ISD::FACOS" , SDTFPUnaryOp>; def fatan : SDNode<"ISD::FATAN" , SDTFPUnaryOp>; def fsinh : SDNode<"ISD::FSINH" , SDTFPUnaryOp>; def fcosh : SDNode<"ISD::FCOSH" , SDTFPUnaryOp>; def ftanh : SDNode<"ISD::FTANH" , SDTFPUnaryOp>; def fexp2 : SDNode<"ISD::FEXP2" , SDTFPUnaryOp>; def fexp10 : SDNode<"ISD::FEXP10" , SDTFPUnaryOp>; def fpow : SDNode<"ISD::FPOW" , SDTFPBinOp>; def flog2 : SDNode<"ISD::FLOG2" , SDTFPUnaryOp>; def fldexp : SDNode<"ISD::FLDEXP" , SDTFPExpOp>; def frint : SDNode<"ISD::FRINT" , SDTFPUnaryOp>; def ftrunc : SDNode<"ISD::FTRUNC" , SDTFPUnaryOp>; def fceil : SDNode<"ISD::FCEIL" , SDTFPUnaryOp>; def ffloor : SDNode<"ISD::FFLOOR" , SDTFPUnaryOp>; def fnearbyint : SDNode<"ISD::FNEARBYINT" , SDTFPUnaryOp>; def fround : SDNode<"ISD::FROUND" , SDTFPUnaryOp>; def froundeven : SDNode<"ISD::FROUNDEVEN" , SDTFPUnaryOp>; def lround : SDNode<"ISD::LROUND" , SDTFPToIntOp>; def llround : SDNode<"ISD::LLROUND" , SDTFPToIntOp>; def lrint : SDNode<"ISD::LRINT" , SDTFPToIntOp>; def llrint : SDNode<"ISD::LLRINT" , SDTFPToIntOp>; def fpround : SDNode<"ISD::FP_ROUND" , SDTFPRoundOp>; def fpextend : SDNode<"ISD::FP_EXTEND" , SDTFPExtendOp>; def fcopysign : SDNode<"ISD::FCOPYSIGN" , SDTFPSignOp>; def is_fpclass : SDNode<"ISD::IS_FPCLASS" , SDIsFPClassOp>; def sint_to_fp : SDNode<"ISD::SINT_TO_FP" , SDTIntToFPOp>; def uint_to_fp : SDNode<"ISD::UINT_TO_FP" , SDTIntToFPOp>; def fp_to_sint : SDNode<"ISD::FP_TO_SINT" , SDTFPToIntOp>; def fp_to_uint : SDNode<"ISD::FP_TO_UINT" , SDTFPToIntOp>; def fp_to_sint_sat : SDNode<"ISD::FP_TO_SINT_SAT" , SDTFPToIntSatOp>; def fp_to_uint_sat : SDNode<"ISD::FP_TO_UINT_SAT" , SDTFPToIntSatOp>; def f16_to_fp : SDNode<"ISD::FP16_TO_FP" , SDTIntToFPOp>; def fp_to_f16 : SDNode<"ISD::FP_TO_FP16" , SDTFPToIntOp>; def bf16_to_fp : SDNode<"ISD::BF16_TO_FP" , SDTIntToFPOp>; def fp_to_bf16 : SDNode<"ISD::FP_TO_BF16" , SDTFPToIntOp>; def strict_fadd : SDNode<"ISD::STRICT_FADD", SDTFPBinOp, [SDNPHasChain, SDNPCommutative]>; def strict_fsub : SDNode<"ISD::STRICT_FSUB", SDTFPBinOp, [SDNPHasChain]>; def strict_fmul : SDNode<"ISD::STRICT_FMUL", SDTFPBinOp, [SDNPHasChain, SDNPCommutative]>; def strict_fdiv : SDNode<"ISD::STRICT_FDIV", SDTFPBinOp, [SDNPHasChain]>; def strict_frem : SDNode<"ISD::STRICT_FREM", SDTFPBinOp, [SDNPHasChain]>; def strict_fma : SDNode<"ISD::STRICT_FMA", SDTFPTernaryOp, [SDNPHasChain, SDNPCommutative]>; def strict_fsqrt : SDNode<"ISD::STRICT_FSQRT", SDTFPUnaryOp, [SDNPHasChain]>; def strict_fsin : SDNode<"ISD::STRICT_FSIN", SDTFPUnaryOp, [SDNPHasChain]>; def strict_fcos : SDNode<"ISD::STRICT_FCOS", SDTFPUnaryOp, [SDNPHasChain]>; def strict_ftan : SDNode<"ISD::STRICT_FTAN", SDTFPUnaryOp, [SDNPHasChain]>; def strict_fasin : SDNode<"ISD::STRICT_FASIN", SDTFPUnaryOp, [SDNPHasChain]>; def strict_facos : SDNode<"ISD::STRICT_FACOS", SDTFPUnaryOp, [SDNPHasChain]>; def strict_fatan : SDNode<"ISD::STRICT_FATAN", SDTFPUnaryOp, [SDNPHasChain]>; def strict_fsinh : SDNode<"ISD::STRICT_FSINH", SDTFPUnaryOp, [SDNPHasChain]>; def strict_fcosh : SDNode<"ISD::STRICT_FCOSH", SDTFPUnaryOp, [SDNPHasChain]>; def strict_ftanh : SDNode<"ISD::STRICT_FTANH", SDTFPUnaryOp, [SDNPHasChain]>; def strict_fexp2 : SDNode<"ISD::STRICT_FEXP2", SDTFPUnaryOp, [SDNPHasChain]>; def strict_fpow : SDNode<"ISD::STRICT_FPOW", SDTFPBinOp, [SDNPHasChain]>; def strict_fldexp : SDNode<"ISD::STRICT_FLDEXP", SDTFPExpOp, [SDNPHasChain]>; def strict_flog2 : SDNode<"ISD::STRICT_FLOG2", SDTFPUnaryOp, [SDNPHasChain]>; def strict_frint : SDNode<"ISD::STRICT_FRINT", SDTFPUnaryOp, [SDNPHasChain]>; def strict_lrint : SDNode<"ISD::STRICT_LRINT", SDTFPToIntOp, [SDNPHasChain]>; def strict_llrint : SDNode<"ISD::STRICT_LLRINT", SDTFPToIntOp, [SDNPHasChain]>; def strict_fnearbyint : SDNode<"ISD::STRICT_FNEARBYINT", SDTFPUnaryOp, [SDNPHasChain]>; def strict_fceil : SDNode<"ISD::STRICT_FCEIL", SDTFPUnaryOp, [SDNPHasChain]>; def strict_ffloor : SDNode<"ISD::STRICT_FFLOOR", SDTFPUnaryOp, [SDNPHasChain]>; def strict_lround : SDNode<"ISD::STRICT_LROUND", SDTFPToIntOp, [SDNPHasChain]>; def strict_llround : SDNode<"ISD::STRICT_LLROUND", SDTFPToIntOp, [SDNPHasChain]>; def strict_fround : SDNode<"ISD::STRICT_FROUND", SDTFPUnaryOp, [SDNPHasChain]>; def strict_froundeven : SDNode<"ISD::STRICT_FROUNDEVEN", SDTFPUnaryOp, [SDNPHasChain]>; def strict_ftrunc : SDNode<"ISD::STRICT_FTRUNC", SDTFPUnaryOp, [SDNPHasChain]>; def strict_fminnum : SDNode<"ISD::STRICT_FMINNUM", SDTFPBinOp, [SDNPHasChain, SDNPCommutative, SDNPAssociative]>; def strict_fmaxnum : SDNode<"ISD::STRICT_FMAXNUM", SDTFPBinOp, [SDNPHasChain, SDNPCommutative, SDNPAssociative]>; def strict_fminimum : SDNode<"ISD::STRICT_FMINIMUM", SDTFPBinOp, [SDNPHasChain, SDNPCommutative, SDNPAssociative]>; def strict_fmaximum : SDNode<"ISD::STRICT_FMAXIMUM", SDTFPBinOp, [SDNPHasChain, SDNPCommutative, SDNPAssociative]>; def strict_fpround : SDNode<"ISD::STRICT_FP_ROUND", SDTFPRoundOp, [SDNPHasChain]>; def strict_fpextend : SDNode<"ISD::STRICT_FP_EXTEND", SDTFPExtendOp, [SDNPHasChain]>; def strict_fp_to_sint : SDNode<"ISD::STRICT_FP_TO_SINT", SDTFPToIntOp, [SDNPHasChain]>; def strict_fp_to_uint : SDNode<"ISD::STRICT_FP_TO_UINT", SDTFPToIntOp, [SDNPHasChain]>; def strict_sint_to_fp : SDNode<"ISD::STRICT_SINT_TO_FP", SDTIntToFPOp, [SDNPHasChain]>; def strict_uint_to_fp : SDNode<"ISD::STRICT_UINT_TO_FP", SDTIntToFPOp, [SDNPHasChain]>; def strict_f16_to_fp : SDNode<"ISD::STRICT_FP16_TO_FP", SDTIntToFPOp, [SDNPHasChain]>; def strict_fp_to_f16 : SDNode<"ISD::STRICT_FP_TO_FP16", SDTFPToIntOp, [SDNPHasChain]>; def strict_bf16_to_fp : SDNode<"ISD::STRICT_BF16_TO_FP", SDTIntToFPOp, [SDNPHasChain]>; def strict_fp_to_bf16 : SDNode<"ISD::STRICT_FP_TO_BF16", SDTFPToIntOp, [SDNPHasChain]>; def strict_fsetcc : SDNode<"ISD::STRICT_FSETCC", SDTSetCC, [SDNPHasChain]>; def strict_fsetccs : SDNode<"ISD::STRICT_FSETCCS", SDTSetCC, [SDNPHasChain]>; def get_fpenv : SDNode<"ISD::GET_FPENV", SDTGetFPStateOp, [SDNPHasChain]>; def set_fpenv : SDNode<"ISD::SET_FPENV", SDTSetFPStateOp, [SDNPHasChain]>; def reset_fpenv : SDNode<"ISD::RESET_FPENV", SDTNone, [SDNPHasChain]>; def get_fpmode : SDNode<"ISD::GET_FPMODE", SDTGetFPStateOp, [SDNPHasChain]>; def set_fpmode : SDNode<"ISD::SET_FPMODE", SDTSetFPStateOp, [SDNPHasChain]>; def reset_fpmode : SDNode<"ISD::RESET_FPMODE", SDTNone, [SDNPHasChain]>; def setcc : SDNode<"ISD::SETCC" , SDTSetCC>; def select : SDNode<"ISD::SELECT" , SDTSelect>; def vselect : SDNode<"ISD::VSELECT" , SDTVSelect>; def selectcc : SDNode<"ISD::SELECT_CC" , SDTSelectCC>; def brcc : SDNode<"ISD::BR_CC" , SDTBrCC, [SDNPHasChain]>; def brcond : SDNode<"ISD::BRCOND" , SDTBrcond, [SDNPHasChain]>; def brind : SDNode<"ISD::BRIND" , SDTBrind, [SDNPHasChain]>; def br : SDNode<"ISD::BR" , SDTBr, [SDNPHasChain]>; def catchret : SDNode<"ISD::CATCHRET" , SDTCatchret, [SDNPHasChain, SDNPSideEffect]>; -def cleanupret : SDNode<"ISD::CLEANUPRET" , SDTNone, [SDNPHasChain]>; +def cleanupret : SDNode<"ISD::CLEANUPRET" , SDTCleanupret, [SDNPHasChain]>; def trap : SDNode<"ISD::TRAP" , SDTNone, [SDNPHasChain, SDNPSideEffect]>; def debugtrap : SDNode<"ISD::DEBUGTRAP" , SDTNone, [SDNPHasChain, SDNPSideEffect]>; def ubsantrap : SDNode<"ISD::UBSANTRAP" , SDTUBSANTrap, [SDNPHasChain, SDNPSideEffect]>; def prefetch : SDNode<"ISD::PREFETCH" , SDTPrefetch, [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>; def readcyclecounter : SDNode<"ISD::READCYCLECOUNTER", SDTIntLeaf, [SDNPHasChain, SDNPSideEffect]>; def readsteadycounter : SDNode<"ISD::READSTEADYCOUNTER", SDTIntLeaf, [SDNPHasChain, SDNPSideEffect]>; def membarrier : SDNode<"ISD::MEMBARRIER", SDTNone, [SDNPHasChain, SDNPSideEffect]>; def jump_table_debug_info : SDNode<"ISD::JUMP_TABLE_DEBUG_INFO", SDTNone, [SDNPHasChain]>; def atomic_fence : SDNode<"ISD::ATOMIC_FENCE" , SDTAtomicFence, [SDNPHasChain, SDNPSideEffect]>; def atomic_cmp_swap : SDNode<"ISD::ATOMIC_CMP_SWAP" , SDTAtomic3, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def atomic_load_add : SDNode<"ISD::ATOMIC_LOAD_ADD" , SDTAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def atomic_swap : SDNode<"ISD::ATOMIC_SWAP", SDTAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def atomic_load_sub : SDNode<"ISD::ATOMIC_LOAD_SUB" , SDTAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def atomic_load_and : SDNode<"ISD::ATOMIC_LOAD_AND" , SDTAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def atomic_load_clr : SDNode<"ISD::ATOMIC_LOAD_CLR" , SDTAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def atomic_load_or : SDNode<"ISD::ATOMIC_LOAD_OR" , SDTAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def atomic_load_xor : SDNode<"ISD::ATOMIC_LOAD_XOR" , SDTAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def atomic_load_nand: SDNode<"ISD::ATOMIC_LOAD_NAND", SDTAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def atomic_load_min : SDNode<"ISD::ATOMIC_LOAD_MIN", SDTAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def atomic_load_max : SDNode<"ISD::ATOMIC_LOAD_MAX", SDTAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def atomic_load_umin : SDNode<"ISD::ATOMIC_LOAD_UMIN", SDTAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def atomic_load_umax : SDNode<"ISD::ATOMIC_LOAD_UMAX", SDTAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def atomic_load_fadd : SDNode<"ISD::ATOMIC_LOAD_FADD" , SDTFPAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def atomic_load_fsub : SDNode<"ISD::ATOMIC_LOAD_FSUB" , SDTFPAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def atomic_load_fmax : SDNode<"ISD::ATOMIC_LOAD_FMAX", SDTFPAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def atomic_load_fmin : SDNode<"ISD::ATOMIC_LOAD_FMIN", SDTFPAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def atomic_load_uinc_wrap : SDNode<"ISD::ATOMIC_LOAD_UINC_WRAP", SDTAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def atomic_load_udec_wrap : SDNode<"ISD::ATOMIC_LOAD_UDEC_WRAP", SDTAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def atomic_load : SDNode<"ISD::ATOMIC_LOAD", SDTAtomicLoad, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def atomic_store : SDNode<"ISD::ATOMIC_STORE", SDTAtomicStore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def masked_st : SDNode<"ISD::MSTORE", SDTMaskedStore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def masked_ld : SDNode<"ISD::MLOAD", SDTMaskedLoad, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def masked_gather : SDNode<"ISD::MGATHER", SDTMaskedGather, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def masked_scatter : SDNode<"ISD::MSCATTER", SDTMaskedScatter, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def vector_compress : SDNode<"ISD::VECTOR_COMPRESS", SDTVectorCompress>; // Do not use ld, st directly. Use load, extload, sextload, zextload, store, // and truncst (see below). def ld : SDNode<"ISD::LOAD" , SDTLoad, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def st : SDNode<"ISD::STORE" , SDTStore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def ist : SDNode<"ISD::STORE" , SDTIStore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def vector_shuffle : SDNode<"ISD::VECTOR_SHUFFLE", SDTVecShuffle, []>; def vector_reverse : SDNode<"ISD::VECTOR_REVERSE", SDTVecReverse>; def vector_splice : SDNode<"ISD::VECTOR_SPLICE", SDTVecSlice, []>; def build_vector : SDNode<"ISD::BUILD_VECTOR", SDTypeProfile<1, -1, []>, []>; def splat_vector : SDNode<"ISD::SPLAT_VECTOR", SDTypeProfile<1, 1, []>, []>; def step_vector : SDNode<"ISD::STEP_VECTOR", SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisInt<1>]>, []>; def scalar_to_vector : SDNode<"ISD::SCALAR_TO_VECTOR", SDTypeProfile<1, 1, []>, []>; // vector_extract/vector_insert are deprecated. extractelt/insertelt // are preferred. def vector_extract : SDNode<"ISD::EXTRACT_VECTOR_ELT", SDTypeProfile<1, 2, [SDTCisPtrTy<2>]>, []>; def vector_insert : SDNode<"ISD::INSERT_VECTOR_ELT", SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisPtrTy<3>]>, []>; def concat_vectors : SDNode<"ISD::CONCAT_VECTORS", SDTypeProfile<1, 2, [SDTCisSubVecOfVec<1, 0>, SDTCisSameAs<1, 2>]>,[]>; // This operator does not do subvector type checking. The ARM // backend, at least, needs it. def vector_extract_subvec : SDNode<"ISD::EXTRACT_SUBVECTOR", SDTypeProfile<1, 2, [SDTCisInt<2>, SDTCisVec<1>, SDTCisVec<0>]>, []>; def vector_insert_subvec : SDNode<"ISD::INSERT_SUBVECTOR", SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVec<2>, SDTCisInt<3>]>, []>; // This operator does subvector type checking. def extract_subvector : SDNode<"ISD::EXTRACT_SUBVECTOR", SDTSubVecExtract, []>; def insert_subvector : SDNode<"ISD::INSERT_SUBVECTOR", SDTSubVecInsert, []>; // Nodes for intrinsics, you should use the intrinsic itself and let tblgen use // these internally. Don't reference these directly. def intrinsic_void : SDNode<"ISD::INTRINSIC_VOID", SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, [SDNPHasChain]>; def intrinsic_w_chain : SDNode<"ISD::INTRINSIC_W_CHAIN", SDTypeProfile<1, -1, [SDTCisPtrTy<1>]>, [SDNPHasChain]>; def intrinsic_wo_chain : SDNode<"ISD::INTRINSIC_WO_CHAIN", SDTypeProfile<1, -1, [SDTCisPtrTy<1>]>, []>; def SDT_assert : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 0>]>; def assertsext : SDNode<"ISD::AssertSext", SDT_assert>; def assertzext : SDNode<"ISD::AssertZext", SDT_assert>; def assertalign : SDNode<"ISD::AssertAlign", SDT_assert>; def convergencectrl_anchor : SDNode<"ISD::CONVERGENCECTRL_ANCHOR", SDTypeProfile<1, 0, [SDTCisVT<0,untyped>]>>; def convergencectrl_entry : SDNode<"ISD::CONVERGENCECTRL_ENTRY", SDTypeProfile<1, 0, [SDTCisVT<0,untyped>]>>; def convergencectrl_loop : SDNode<"ISD::CONVERGENCECTRL_LOOP", SDTypeProfile<1, 1, [SDTCisVT<0,untyped>, SDTCisVT<1,untyped>]>>; def convergencectrl_glue : SDNode<"ISD::CONVERGENCECTRL_GLUE", SDTypeProfile<0, 1, [SDTCisVT<0, untyped>]>>; //===----------------------------------------------------------------------===// // Selection DAG Condition Codes class CondCode { string ICmpPredicate = icmpName; string FCmpPredicate = fcmpName; } // ISD::CondCode enums, and mapping to CmpInst::Predicate names def SETOEQ : CondCode<"FCMP_OEQ">; def SETOGT : CondCode<"FCMP_OGT">; def SETOGE : CondCode<"FCMP_OGE">; def SETOLT : CondCode<"FCMP_OLT">; def SETOLE : CondCode<"FCMP_OLE">; def SETONE : CondCode<"FCMP_ONE">; def SETO : CondCode<"FCMP_ORD">; def SETUO : CondCode<"FCMP_UNO">; def SETUEQ : CondCode<"FCMP_UEQ">; def SETUGT : CondCode<"FCMP_UGT", "ICMP_UGT">; def SETUGE : CondCode<"FCMP_UGE", "ICMP_UGE">; def SETULT : CondCode<"FCMP_ULT", "ICMP_ULT">; def SETULE : CondCode<"FCMP_ULE", "ICMP_ULE">; def SETUNE : CondCode<"FCMP_UNE">; def SETEQ : CondCode<"", "ICMP_EQ">; def SETGT : CondCode<"", "ICMP_SGT">; def SETGE : CondCode<"", "ICMP_SGE">; def SETLT : CondCode<"", "ICMP_SLT">; def SETLE : CondCode<"", "ICMP_SLE">; def SETNE : CondCode<"", "ICMP_NE">; //===----------------------------------------------------------------------===// // Selection DAG Node Transformation Functions. // // This mechanism allows targets to manipulate nodes in the output DAG once a // match has been formed. This is typically used to manipulate immediate // values. // class SDNodeXForm { SDNode Opcode = opc; code XFormFunction = xformFunction; } def NOOP_SDNodeXForm : SDNodeXForm; //===----------------------------------------------------------------------===// // Selection DAG Pattern Fragments. // // Pattern fragments are reusable chunks of dags that match specific things. // They can take arguments and have C++ predicates that control whether they // match. They are intended to make the patterns for common instructions more // compact and readable. // /// PatFrags - Represents a set of pattern fragments. Each single fragment /// can match something on the DAG, from a single node to multiple nested other /// fragments. The whole set of fragments matches if any of the single /// fragments match. This allows e.g. matching and "add with overflow" and /// a regular "add" with the same fragment set. /// class PatFrags frags, code pred = [{}], SDNodeXForm xform = NOOP_SDNodeXForm> : SDPatternOperator { dag Operands = ops; list Fragments = frags; code PredicateCode = pred; code GISelPredicateCode = [{}]; code ImmediateCode = [{}]; SDNodeXForm OperandTransform = xform; // When this is set, the PredicateCode may refer to a constant Operands // vector which contains the captured nodes of the DAG, in the order listed // by the Operands field above. // // This is useful when Fragments involves associative / commutative // operators: a single piece of code can easily refer to all operands even // when re-associated / commuted variants of the fragment are matched. bit PredicateCodeUsesOperands = false; // Define a few pre-packaged predicates. This helps GlobalISel import // existing rules from SelectionDAG for many common cases. // They will be tested prior to the code in pred and must not be used in // ImmLeaf and its subclasses. // If set to true, a predicate is added that checks for the absence of use of // the first result. bit HasNoUse = ?; // If set to true, a predicate is added that checks for the sole use of // the first result. bit HasOneUse = ?; // Is the desired pre-packaged predicate for a load? bit IsLoad = ?; // Is the desired pre-packaged predicate for a store? bit IsStore = ?; // Is the desired pre-packaged predicate for an atomic? bit IsAtomic = ?; // cast(N)->getAddressingMode() == ISD::UNINDEXED; // cast(N)->getAddressingMode() == ISD::UNINDEXED; bit IsUnindexed = ?; // cast(N)->getExtensionType() != ISD::NON_EXTLOAD bit IsNonExtLoad = ?; // cast(N)->getExtensionType() == ISD::EXTLOAD; bit IsAnyExtLoad = ?; // cast(N)->getExtensionType() == ISD::SEXTLOAD; bit IsSignExtLoad = ?; // cast(N)->getExtensionType() == ISD::ZEXTLOAD; bit IsZeroExtLoad = ?; // !cast(N)->isTruncatingStore(); // cast(N)->isTruncatingStore(); bit IsTruncStore = ?; // cast(N)->getAddressSpace() == // If this empty, accept any address space. list AddressSpaces = ?; // cast(N)->getAlign() >= // If this is empty, accept any alignment. int MinAlignment = ?; // cast(N)->getOrdering() == AtomicOrdering::Monotonic bit IsAtomicOrderingMonotonic = ?; // cast(N)->getOrdering() == AtomicOrdering::Acquire bit IsAtomicOrderingAcquire = ?; // cast(N)->getOrdering() == AtomicOrdering::Release bit IsAtomicOrderingRelease = ?; // cast(N)->getOrdering() == AtomicOrdering::AcquireRelease bit IsAtomicOrderingAcquireRelease = ?; // cast(N)->getOrdering() == AtomicOrdering::SequentiallyConsistent bit IsAtomicOrderingSequentiallyConsistent = ?; // isAcquireOrStronger(cast(N)->getOrdering()) // !isAcquireOrStronger(cast(N)->getOrdering()) bit IsAtomicOrderingAcquireOrStronger = ?; // isReleaseOrStronger(cast(N)->getOrdering()) // !isReleaseOrStronger(cast(N)->getOrdering()) bit IsAtomicOrderingReleaseOrStronger = ?; // cast(N)->getMemoryVT() == MVT::; // cast(N)->getMemoryVT() == MVT::; ValueType MemoryVT = ?; // cast(N)->getMemoryVT().getScalarType() == MVT::; // cast(N)->getMemoryVT().getScalarType() == MVT::; ValueType ScalarMemoryVT = ?; } // Patterns and PatFrags can also subclass GISelFlags to set flags that affect // how GlobalISel behaves when matching them. class GISelFlags { bit GIIgnoreCopies = ?; } // PatFrag - A version of PatFrags matching only a single fragment. class PatFrag : PatFrags; // OutPatFrag is a pattern fragment that is used as part of an output pattern // (not an input pattern). These do not have predicates or transforms, but are // used to avoid repeated subexpressions in output patterns. class OutPatFrag : PatFrag; // PatLeaf's are pattern fragments that have no operands. This is just a helper // to define immediates and other common things concisely. class PatLeaf : PatFrag<(ops), frag, pred, xform>; // ImmLeaf is a pattern fragment with a constraint on the immediate. The // constraint is a function that is run on the immediate (always with the value // sign extended out to an int64_t) as Imm. For example: // // def immSExt8 : ImmLeaf; // // this is a more convenient form to match 'imm' nodes in than PatLeaf and also // is preferred over using PatLeaf because it allows the code generator to // reason more about the constraint. // // If FastIsel should ignore all instructions that have an operand of this type, // the FastIselShouldIgnore flag can be set. This is an optimization to reduce // the code size of the generated fast instruction selector. class ImmLeaf : PatFrag<(ops), (vt ImmNode), [{}], xform> { let ImmediateCode = pred; bit FastIselShouldIgnore = false; // Is the data type of the immediate an APInt? bit IsAPInt = false; // Is the data type of the immediate an APFloat? bit IsAPFloat = false; } // Convenience wrapper for ImmLeaf to use timm/TargetConstant instead // of imm/Constant. class TImmLeaf : ImmLeaf; // An ImmLeaf except that Imm is an APInt. This is useful when you need to // zero-extend the immediate instead of sign-extend it. // // Note that FastISel does not currently understand IntImmLeaf and will not // generate code for rules that make use of it. As such, it does not make sense // to replace ImmLeaf with IntImmLeaf. However, replacing PatLeaf with an // IntImmLeaf will allow GlobalISel to import the rule. class IntImmLeaf : ImmLeaf { let IsAPInt = true; let FastIselShouldIgnore = true; } // An ImmLeaf except that Imm is an APFloat. // // Note that FastISel does not currently understand FPImmLeaf and will not // generate code for rules that make use of it. class FPImmLeaf : ImmLeaf { let IsAPFloat = true; let FastIselShouldIgnore = true; } // Leaf fragments. def vtInt : PatLeaf<(vt), [{ return N->getVT().isInteger(); }]>; def vtFP : PatLeaf<(vt), [{ return N->getVT().isFloatingPoint(); }]>; // Use ISD::isConstantSplatVectorAllOnes or ISD::isConstantSplatVectorAllZeros // to look for the corresponding build_vector or splat_vector. Will look through // bitcasts and check for either opcode, except when used as a pattern root. // When used as a pattern root, only fixed-length build_vector and scalable // splat_vector are supported. def immAllOnesV : SDPatternOperator; // ISD::isConstantSplatVectorAllOnes def immAllZerosV : SDPatternOperator; // ISD::isConstantSplatVectorAllZeros // Other helper fragments. def not : PatFrag<(ops node:$in), (xor node:$in, -1)>; def vnot : PatFrag<(ops node:$in), (xor node:$in, immAllOnesV)>; def ineg : PatFrag<(ops node:$in), (sub 0, node:$in)>; def zanyext : PatFrags<(ops node:$op), [(zext node:$op), (anyext node:$op)]>; // null_frag - The null pattern operator is used in multiclass instantiations // which accept an SDPatternOperator for use in matching patterns for internal // definitions. When expanding a pattern, if the null fragment is referenced // in the expansion, the pattern is discarded and it is as-if '[]' had been // specified. This allows multiclasses to have the isel patterns be optional. def null_frag : SDPatternOperator; // load fragments. def unindexedload : PatFrag<(ops node:$ptr), (ld node:$ptr)> { let IsLoad = true; let IsUnindexed = true; } def load : PatFrag<(ops node:$ptr), (unindexedload node:$ptr)> { let IsLoad = true; let IsNonExtLoad = true; } // extending load fragments. def extload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr)> { let IsLoad = true; let IsAnyExtLoad = true; } def sextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr)> { let IsLoad = true; let IsSignExtLoad = true; } def zextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr)> { let IsLoad = true; let IsZeroExtLoad = true; } def extloadi1 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { let IsLoad = true; let MemoryVT = i1; } def extloadi8 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { let IsLoad = true; let MemoryVT = i8; } def extloadi16 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { let IsLoad = true; let MemoryVT = i16; } def extloadi32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { let IsLoad = true; let MemoryVT = i32; } def extloadi64 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { let IsLoad = true; let MemoryVT = i64; } def extloadf16 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { let IsLoad = true; let MemoryVT = f16; } def extloadf32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { let IsLoad = true; let MemoryVT = f32; } def extloadf64 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { let IsLoad = true; let MemoryVT = f64; } def sextloadi1 : PatFrag<(ops node:$ptr), (sextload node:$ptr)> { let IsLoad = true; let MemoryVT = i1; } def sextloadi8 : PatFrag<(ops node:$ptr), (sextload node:$ptr)> { let IsLoad = true; let MemoryVT = i8; } def sextloadi16 : PatFrag<(ops node:$ptr), (sextload node:$ptr)> { let IsLoad = true; let MemoryVT = i16; } def sextloadi32 : PatFrag<(ops node:$ptr), (sextload node:$ptr)> { let IsLoad = true; let MemoryVT = i32; } def sextloadi64 : PatFrag<(ops node:$ptr), (sextload node:$ptr)> { let IsLoad = true; let MemoryVT = i64; } def zextloadi1 : PatFrag<(ops node:$ptr), (zextload node:$ptr)> { let IsLoad = true; let MemoryVT = i1; } def zextloadi8 : PatFrag<(ops node:$ptr), (zextload node:$ptr)> { let IsLoad = true; let MemoryVT = i8; } def zextloadi16 : PatFrag<(ops node:$ptr), (zextload node:$ptr)> { let IsLoad = true; let MemoryVT = i16; } def zextloadi32 : PatFrag<(ops node:$ptr), (zextload node:$ptr)> { let IsLoad = true; let MemoryVT = i32; } def zextloadi64 : PatFrag<(ops node:$ptr), (zextload node:$ptr)> { let IsLoad = true; let MemoryVT = i64; } def extloadvi1 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { let IsLoad = true; let ScalarMemoryVT = i1; } def extloadvi8 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { let IsLoad = true; let ScalarMemoryVT = i8; } def extloadvi16 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { let IsLoad = true; let ScalarMemoryVT = i16; } def extloadvi32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { let IsLoad = true; let ScalarMemoryVT = i32; } def extloadvf16 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { let IsLoad = true; let ScalarMemoryVT = f16; } def extloadvf32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { let IsLoad = true; let ScalarMemoryVT = f32; } def extloadvf64 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { let IsLoad = true; let ScalarMemoryVT = f64; } def sextloadvi1 : PatFrag<(ops node:$ptr), (sextload node:$ptr)> { let IsLoad = true; let ScalarMemoryVT = i1; } def sextloadvi8 : PatFrag<(ops node:$ptr), (sextload node:$ptr)> { let IsLoad = true; let ScalarMemoryVT = i8; } def sextloadvi16 : PatFrag<(ops node:$ptr), (sextload node:$ptr)> { let IsLoad = true; let ScalarMemoryVT = i16; } def sextloadvi32 : PatFrag<(ops node:$ptr), (sextload node:$ptr)> { let IsLoad = true; let ScalarMemoryVT = i32; } def zextloadvi1 : PatFrag<(ops node:$ptr), (zextload node:$ptr)> { let IsLoad = true; let ScalarMemoryVT = i1; } def zextloadvi8 : PatFrag<(ops node:$ptr), (zextload node:$ptr)> { let IsLoad = true; let ScalarMemoryVT = i8; } def zextloadvi16 : PatFrag<(ops node:$ptr), (zextload node:$ptr)> { let IsLoad = true; let ScalarMemoryVT = i16; } def zextloadvi32 : PatFrag<(ops node:$ptr), (zextload node:$ptr)> { let IsLoad = true; let ScalarMemoryVT = i32; } // store fragments. def unindexedstore : PatFrag<(ops node:$val, node:$ptr), (st node:$val, node:$ptr)> { let IsStore = true; let IsUnindexed = true; } def store : PatFrag<(ops node:$val, node:$ptr), (unindexedstore node:$val, node:$ptr)> { let IsStore = true; let IsTruncStore = false; } // truncstore fragments. def truncstore : PatFrag<(ops node:$val, node:$ptr), (unindexedstore node:$val, node:$ptr)> { let IsStore = true; let IsTruncStore = true; } def truncstorei8 : PatFrag<(ops node:$val, node:$ptr), (truncstore node:$val, node:$ptr)> { let IsStore = true; let MemoryVT = i8; let IsTruncStore = true; } def truncstorei16 : PatFrag<(ops node:$val, node:$ptr), (truncstore node:$val, node:$ptr)> { let IsStore = true; let MemoryVT = i16; let IsTruncStore = true; } def truncstorei32 : PatFrag<(ops node:$val, node:$ptr), (truncstore node:$val, node:$ptr)> { let IsStore = true; let MemoryVT = i32; let IsTruncStore = true; } def truncstorei64 : PatFrag<(ops node:$val, node:$ptr), (truncstore node:$val, node:$ptr)> { let IsStore = true; let MemoryVT = i64; let IsTruncStore = true; } def truncstoref16 : PatFrag<(ops node:$val, node:$ptr), (truncstore node:$val, node:$ptr)> { let IsStore = true; let MemoryVT = f16; } def truncstoref32 : PatFrag<(ops node:$val, node:$ptr), (truncstore node:$val, node:$ptr)> { let IsStore = true; let MemoryVT = f32; } def truncstoref64 : PatFrag<(ops node:$val, node:$ptr), (truncstore node:$val, node:$ptr)> { let IsStore = true; let MemoryVT = f64; } def truncstorevi8 : PatFrag<(ops node:$val, node:$ptr), (truncstore node:$val, node:$ptr)> { let IsStore = true; let ScalarMemoryVT = i8; } def truncstorevi16 : PatFrag<(ops node:$val, node:$ptr), (truncstore node:$val, node:$ptr)> { let IsStore = true; let ScalarMemoryVT = i16; } def truncstorevi32 : PatFrag<(ops node:$val, node:$ptr), (truncstore node:$val, node:$ptr)> { let IsStore = true; let ScalarMemoryVT = i32; } // indexed store fragments. def istore : PatFrag<(ops node:$val, node:$base, node:$offset), (ist node:$val, node:$base, node:$offset)> { let IsStore = true; let IsTruncStore = false; } def pre_store : PatFrag<(ops node:$val, node:$base, node:$offset), (istore node:$val, node:$base, node:$offset), [{ ISD::MemIndexedMode AM = cast(N)->getAddressingMode(); return AM == ISD::PRE_INC || AM == ISD::PRE_DEC; }]>; def itruncstore : PatFrag<(ops node:$val, node:$base, node:$offset), (ist node:$val, node:$base, node:$offset)> { let IsStore = true; let IsTruncStore = true; } def pre_truncst : PatFrag<(ops node:$val, node:$base, node:$offset), (itruncstore node:$val, node:$base, node:$offset), [{ ISD::MemIndexedMode AM = cast(N)->getAddressingMode(); return AM == ISD::PRE_INC || AM == ISD::PRE_DEC; }]>; def pre_truncsti1 : PatFrag<(ops node:$val, node:$base, node:$offset), (pre_truncst node:$val, node:$base, node:$offset)> { let IsStore = true; let MemoryVT = i1; } def pre_truncsti8 : PatFrag<(ops node:$val, node:$base, node:$offset), (pre_truncst node:$val, node:$base, node:$offset)> { let IsStore = true; let MemoryVT = i8; } def pre_truncsti16 : PatFrag<(ops node:$val, node:$base, node:$offset), (pre_truncst node:$val, node:$base, node:$offset)> { let IsStore = true; let MemoryVT = i16; } def pre_truncsti32 : PatFrag<(ops node:$val, node:$base, node:$offset), (pre_truncst node:$val, node:$base, node:$offset)> { let IsStore = true; let MemoryVT = i32; } def pre_truncstf32 : PatFrag<(ops node:$val, node:$base, node:$offset), (pre_truncst node:$val, node:$base, node:$offset)> { let IsStore = true; let MemoryVT = f32; } def pre_truncstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset), (pre_truncst node:$val, node:$base, node:$offset)> { let IsStore = true; let ScalarMemoryVT = i8; } def pre_truncstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset), (pre_truncst node:$val, node:$base, node:$offset)> { let IsStore = true; let ScalarMemoryVT = i16; } def post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), (istore node:$val, node:$ptr, node:$offset), [{ ISD::MemIndexedMode AM = cast(N)->getAddressingMode(); return AM == ISD::POST_INC || AM == ISD::POST_DEC; }]>; def post_truncst : PatFrag<(ops node:$val, node:$base, node:$offset), (itruncstore node:$val, node:$base, node:$offset), [{ ISD::MemIndexedMode AM = cast(N)->getAddressingMode(); return AM == ISD::POST_INC || AM == ISD::POST_DEC; }]>; def post_truncsti1 : PatFrag<(ops node:$val, node:$base, node:$offset), (post_truncst node:$val, node:$base, node:$offset)> { let IsStore = true; let MemoryVT = i1; } def post_truncsti8 : PatFrag<(ops node:$val, node:$base, node:$offset), (post_truncst node:$val, node:$base, node:$offset)> { let IsStore = true; let MemoryVT = i8; } def post_truncsti16 : PatFrag<(ops node:$val, node:$base, node:$offset), (post_truncst node:$val, node:$base, node:$offset)> { let IsStore = true; let MemoryVT = i16; } def post_truncsti32 : PatFrag<(ops node:$val, node:$base, node:$offset), (post_truncst node:$val, node:$base, node:$offset)> { let IsStore = true; let MemoryVT = i32; } def post_truncstf32 : PatFrag<(ops node:$val, node:$base, node:$offset), (post_truncst node:$val, node:$base, node:$offset)> { let IsStore = true; let MemoryVT = f32; } def post_truncstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset), (post_truncst node:$val, node:$base, node:$offset)> { let IsStore = true; let ScalarMemoryVT = i8; } def post_truncstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset), (post_truncst node:$val, node:$base, node:$offset)> { let IsStore = true; let ScalarMemoryVT = i16; } // A helper for matching undef or freeze undef def undef_or_freeze_undef : PatFrags<(ops), [(undef), (freeze undef)]>; // TODO: Split these into volatile and unordered flavors to enable // selectively legal optimizations for each. (See D66309) def simple_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ return cast(N)->isSimple(); }]>; def simple_store : PatFrag<(ops node:$val, node:$ptr), (store node:$val, node:$ptr), [{ return cast(N)->isSimple(); }]>; // nontemporal store fragments. def nontemporalstore : PatFrag<(ops node:$val, node:$ptr), (store node:$val, node:$ptr), [{ return cast(N)->isNonTemporal(); }]>; def alignednontemporalstore : PatFrag<(ops node:$val, node:$ptr), (nontemporalstore node:$val, node:$ptr), [{ StoreSDNode *St = cast(N); return St->getAlign() >= St->getMemoryVT().getStoreSize(); }]>; def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr), (nontemporalstore node:$val, node:$ptr), [{ StoreSDNode *St = cast(N); return St->getAlignment() < St->getMemoryVT().getStoreSize(); }]>; // nontemporal load fragments. def nontemporalload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ return cast(N)->isNonTemporal(); }]>; def alignednontemporalload : PatFrag<(ops node:$ptr), (nontemporalload node:$ptr), [{ LoadSDNode *Ld = cast(N); return Ld->getAlign() >= Ld->getMemoryVT().getStoreSize(); }]>; // setcc convenience fragments. def setoeq : PatFrag<(ops node:$lhs, node:$rhs), (setcc node:$lhs, node:$rhs, SETOEQ)>; def setogt : PatFrag<(ops node:$lhs, node:$rhs), (setcc node:$lhs, node:$rhs, SETOGT)>; def setoge : PatFrag<(ops node:$lhs, node:$rhs), (setcc node:$lhs, node:$rhs, SETOGE)>; def setolt : PatFrag<(ops node:$lhs, node:$rhs), (setcc node:$lhs, node:$rhs, SETOLT)>; def setole : PatFrag<(ops node:$lhs, node:$rhs), (setcc node:$lhs, node:$rhs, SETOLE)>; def setone : PatFrag<(ops node:$lhs, node:$rhs), (setcc node:$lhs, node:$rhs, SETONE)>; def seto : PatFrag<(ops node:$lhs, node:$rhs), (setcc node:$lhs, node:$rhs, SETO)>; def setuo : PatFrag<(ops node:$lhs, node:$rhs), (setcc node:$lhs, node:$rhs, SETUO)>; def setueq : PatFrag<(ops node:$lhs, node:$rhs), (setcc node:$lhs, node:$rhs, SETUEQ)>; def setugt : PatFrag<(ops node:$lhs, node:$rhs), (setcc node:$lhs, node:$rhs, SETUGT)>; def setuge : PatFrag<(ops node:$lhs, node:$rhs), (setcc node:$lhs, node:$rhs, SETUGE)>; def setult : PatFrag<(ops node:$lhs, node:$rhs), (setcc node:$lhs, node:$rhs, SETULT)>; def setule : PatFrag<(ops node:$lhs, node:$rhs), (setcc node:$lhs, node:$rhs, SETULE)>; def setune : PatFrag<(ops node:$lhs, node:$rhs), (setcc node:$lhs, node:$rhs, SETUNE)>; def seteq : PatFrag<(ops node:$lhs, node:$rhs), (setcc node:$lhs, node:$rhs, SETEQ)>; def setgt : PatFrag<(ops node:$lhs, node:$rhs), (setcc node:$lhs, node:$rhs, SETGT)>; def setge : PatFrag<(ops node:$lhs, node:$rhs), (setcc node:$lhs, node:$rhs, SETGE)>; def setlt : PatFrag<(ops node:$lhs, node:$rhs), (setcc node:$lhs, node:$rhs, SETLT)>; def setle : PatFrag<(ops node:$lhs, node:$rhs), (setcc node:$lhs, node:$rhs, SETLE)>; def setne : PatFrag<(ops node:$lhs, node:$rhs), (setcc node:$lhs, node:$rhs, SETNE)>; // We don't have strict FP extended loads as single DAG nodes, but we can // still provide convenience fragments to match those operations. def strict_extloadf32 : PatFrag<(ops node:$ptr), (strict_fpextend (f32 (load node:$ptr)))>; def strict_extloadf64 : PatFrag<(ops node:$ptr), (strict_fpextend (f64 (load node:$ptr)))>; // Convenience fragments to match both strict and non-strict fp operations def any_fadd : PatFrags<(ops node:$lhs, node:$rhs), [(strict_fadd node:$lhs, node:$rhs), (fadd node:$lhs, node:$rhs)]>; def any_fsub : PatFrags<(ops node:$lhs, node:$rhs), [(strict_fsub node:$lhs, node:$rhs), (fsub node:$lhs, node:$rhs)]>; def any_fmul : PatFrags<(ops node:$lhs, node:$rhs), [(strict_fmul node:$lhs, node:$rhs), (fmul node:$lhs, node:$rhs)]>; def any_fdiv : PatFrags<(ops node:$lhs, node:$rhs), [(strict_fdiv node:$lhs, node:$rhs), (fdiv node:$lhs, node:$rhs)]>; def any_frem : PatFrags<(ops node:$lhs, node:$rhs), [(strict_frem node:$lhs, node:$rhs), (frem node:$lhs, node:$rhs)]>; def any_fma : PatFrags<(ops node:$src1, node:$src2, node:$src3), [(strict_fma node:$src1, node:$src2, node:$src3), (fma node:$src1, node:$src2, node:$src3)]>; def any_fsqrt : PatFrags<(ops node:$src), [(strict_fsqrt node:$src), (fsqrt node:$src)]>; def any_fsin : PatFrags<(ops node:$src), [(strict_fsin node:$src), (fsin node:$src)]>; def any_fcos : PatFrags<(ops node:$src), [(strict_fcos node:$src), (fcos node:$src)]>; def any_ftan : PatFrags<(ops node:$src), [(strict_ftan node:$src), (ftan node:$src)]>; def any_fasin : PatFrags<(ops node:$src), [(strict_fasin node:$src), (fasin node:$src)]>; def any_facos : PatFrags<(ops node:$src), [(strict_facos node:$src), (facos node:$src)]>; def any_fatan : PatFrags<(ops node:$src), [(strict_fatan node:$src), (fatan node:$src)]>; def any_fsinh : PatFrags<(ops node:$src), [(strict_fsinh node:$src), (fsinh node:$src)]>; def any_fcosh : PatFrags<(ops node:$src), [(strict_fcosh node:$src), (fcosh node:$src)]>; def any_ftanh : PatFrags<(ops node:$src), [(strict_ftanh node:$src), (ftanh node:$src)]>; def any_fexp2 : PatFrags<(ops node:$src), [(strict_fexp2 node:$src), (fexp2 node:$src)]>; def any_fpow : PatFrags<(ops node:$lhs, node:$rhs), [(strict_fpow node:$lhs, node:$rhs), (fpow node:$lhs, node:$rhs)]>; def any_fldexp : PatFrags<(ops node:$lhs, node:$rhs), [(strict_fldexp node:$lhs, node:$rhs), (fldexp node:$lhs, node:$rhs)]>; def any_flog2 : PatFrags<(ops node:$src), [(strict_flog2 node:$src), (flog2 node:$src)]>; def any_frint : PatFrags<(ops node:$src), [(strict_frint node:$src), (frint node:$src)]>; def any_lrint : PatFrags<(ops node:$src), [(strict_lrint node:$src), (lrint node:$src)]>; def any_llrint : PatFrags<(ops node:$src), [(strict_llrint node:$src), (llrint node:$src)]>; def any_fnearbyint : PatFrags<(ops node:$src), [(strict_fnearbyint node:$src), (fnearbyint node:$src)]>; def any_fceil : PatFrags<(ops node:$src), [(strict_fceil node:$src), (fceil node:$src)]>; def any_ffloor : PatFrags<(ops node:$src), [(strict_ffloor node:$src), (ffloor node:$src)]>; def any_lround : PatFrags<(ops node:$src), [(strict_lround node:$src), (lround node:$src)]>; def any_llround : PatFrags<(ops node:$src), [(strict_llround node:$src), (llround node:$src)]>; def any_fround : PatFrags<(ops node:$src), [(strict_fround node:$src), (fround node:$src)]>; def any_froundeven : PatFrags<(ops node:$src), [(strict_froundeven node:$src), (froundeven node:$src)]>; def any_ftrunc : PatFrags<(ops node:$src), [(strict_ftrunc node:$src), (ftrunc node:$src)]>; def any_fmaxnum : PatFrags<(ops node:$lhs, node:$rhs), [(strict_fmaxnum node:$lhs, node:$rhs), (fmaxnum node:$lhs, node:$rhs)]>; def any_fminnum : PatFrags<(ops node:$lhs, node:$rhs), [(strict_fminnum node:$lhs, node:$rhs), (fminnum node:$lhs, node:$rhs)]>; def any_fmaximum : PatFrags<(ops node:$lhs, node:$rhs), [(strict_fmaximum node:$lhs, node:$rhs), (fmaximum node:$lhs, node:$rhs)]>; def any_fminimum : PatFrags<(ops node:$lhs, node:$rhs), [(strict_fminimum node:$lhs, node:$rhs), (fminimum node:$lhs, node:$rhs)]>; def any_fpround : PatFrags<(ops node:$src), [(strict_fpround node:$src), (fpround node:$src)]>; def any_fpextend : PatFrags<(ops node:$src), [(strict_fpextend node:$src), (fpextend node:$src)]>; def any_extloadf32 : PatFrags<(ops node:$ptr), [(strict_extloadf32 node:$ptr), (extloadf32 node:$ptr)]>; def any_extloadf64 : PatFrags<(ops node:$ptr), [(strict_extloadf64 node:$ptr), (extloadf64 node:$ptr)]>; def any_fp_to_sint : PatFrags<(ops node:$src), [(strict_fp_to_sint node:$src), (fp_to_sint node:$src)]>; def any_fp_to_uint : PatFrags<(ops node:$src), [(strict_fp_to_uint node:$src), (fp_to_uint node:$src)]>; def any_sint_to_fp : PatFrags<(ops node:$src), [(strict_sint_to_fp node:$src), (sint_to_fp node:$src)]>; def any_uint_to_fp : PatFrags<(ops node:$src), [(strict_uint_to_fp node:$src), (uint_to_fp node:$src)]>; def any_fsetcc : PatFrags<(ops node:$lhs, node:$rhs, node:$pred), [(strict_fsetcc node:$lhs, node:$rhs, node:$pred), (setcc node:$lhs, node:$rhs, node:$pred)]>; def any_fsetccs : PatFrags<(ops node:$lhs, node:$rhs, node:$pred), [(strict_fsetccs node:$lhs, node:$rhs, node:$pred), (setcc node:$lhs, node:$rhs, node:$pred)]>; def any_f16_to_fp : PatFrags<(ops node:$src), [(f16_to_fp node:$src), (strict_f16_to_fp node:$src)]>; def any_fp_to_f16 : PatFrags<(ops node:$src), [(fp_to_f16 node:$src), (strict_fp_to_f16 node:$src)]>; def any_bf16_to_fp : PatFrags<(ops node:$src), [(bf16_to_fp node:$src), (strict_bf16_to_fp node:$src)]>; def any_fp_to_bf16 : PatFrags<(ops node:$src), [(fp_to_bf16 node:$src), (strict_fp_to_bf16 node:$src)]>; multiclass binary_atomic_op_ord { def NAME#_monotonic : PatFrag<(ops node:$ptr, node:$val), (!cast(NAME) node:$ptr, node:$val)> { let IsAtomic = true; let IsAtomicOrderingMonotonic = true; } def NAME#_acquire : PatFrag<(ops node:$ptr, node:$val), (!cast(NAME) node:$ptr, node:$val)> { let IsAtomic = true; let IsAtomicOrderingAcquire = true; } def NAME#_release : PatFrag<(ops node:$ptr, node:$val), (!cast(NAME) node:$ptr, node:$val)> { let IsAtomic = true; let IsAtomicOrderingRelease = true; } def NAME#_acq_rel : PatFrag<(ops node:$ptr, node:$val), (!cast(NAME) node:$ptr, node:$val)> { let IsAtomic = true; let IsAtomicOrderingAcquireRelease = true; } def NAME#_seq_cst : PatFrag<(ops node:$ptr, node:$val), (!cast(NAME) node:$ptr, node:$val)> { let IsAtomic = true; let IsAtomicOrderingSequentiallyConsistent = true; } } multiclass ternary_atomic_op_ord { def NAME#_monotonic : PatFrag<(ops node:$ptr, node:$cmp, node:$val), (!cast(NAME) node:$ptr, node:$cmp, node:$val)> { let IsAtomic = true; let IsAtomicOrderingMonotonic = true; } def NAME#_acquire : PatFrag<(ops node:$ptr, node:$cmp, node:$val), (!cast(NAME) node:$ptr, node:$cmp, node:$val)> { let IsAtomic = true; let IsAtomicOrderingAcquire = true; } def NAME#_release : PatFrag<(ops node:$ptr, node:$cmp, node:$val), (!cast(NAME) node:$ptr, node:$cmp, node:$val)> { let IsAtomic = true; let IsAtomicOrderingRelease = true; } def NAME#_acq_rel : PatFrag<(ops node:$ptr, node:$cmp, node:$val), (!cast(NAME) node:$ptr, node:$cmp, node:$val)> { let IsAtomic = true; let IsAtomicOrderingAcquireRelease = true; } def NAME#_seq_cst : PatFrag<(ops node:$ptr, node:$cmp, node:$val), (!cast(NAME) node:$ptr, node:$cmp, node:$val)> { let IsAtomic = true; let IsAtomicOrderingSequentiallyConsistent = true; } } multiclass binary_atomic_op { foreach vt = [ i8, i16, i32, i64 ] in { def _#vt : PatFrag<(ops node:$ptr, node:$val), (atomic_op node:$ptr, node:$val)> { let IsAtomic = true; let MemoryVT = vt; } defm NAME#_#vt : binary_atomic_op_ord; } } multiclass binary_atomic_op_fp { foreach vt = [ f16, bf16, v2f16, v2bf16, f32, f64 ] in { def _#vt : PatFrag<(ops node:$ptr, node:$val), (atomic_op node:$ptr, node:$val)> { let IsAtomic = true; let MemoryVT = vt; } } } multiclass ternary_atomic_op { foreach vt = [ i8, i16, i32, i64 ] in { def _#vt : PatFrag<(ops node:$ptr, node:$cmp, node:$val), (atomic_op node:$ptr, node:$cmp, node:$val)> { let IsAtomic = true; let MemoryVT = vt; } defm NAME#_#vt : ternary_atomic_op_ord; } } defm atomic_load_add : binary_atomic_op; defm atomic_swap : binary_atomic_op; defm atomic_load_sub : binary_atomic_op; defm atomic_load_and : binary_atomic_op; defm atomic_load_clr : binary_atomic_op; defm atomic_load_or : binary_atomic_op; defm atomic_load_xor : binary_atomic_op; defm atomic_load_nand : binary_atomic_op; defm atomic_load_min : binary_atomic_op; defm atomic_load_max : binary_atomic_op; defm atomic_load_umin : binary_atomic_op; defm atomic_load_umax : binary_atomic_op; defm atomic_cmp_swap : ternary_atomic_op; /// Atomic load which zeroes the excess high bits. def atomic_load_zext : PatFrag<(ops node:$ptr), (atomic_load node:$ptr)> { let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic? let IsZeroExtLoad = true; } /// Atomic load which sign extends the excess high bits. def atomic_load_sext : PatFrag<(ops node:$ptr), (atomic_load node:$ptr)> { let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic? let IsSignExtLoad = true; } def atomic_load_8 : PatFrag<(ops node:$ptr), (atomic_load node:$ptr)> { let IsAtomic = true; let MemoryVT = i8; } def atomic_load_16 : PatFrag<(ops node:$ptr), (atomic_load node:$ptr)> { let IsAtomic = true; let MemoryVT = i16; } def atomic_load_32 : PatFrag<(ops node:$ptr), (atomic_load node:$ptr)> { let IsAtomic = true; let MemoryVT = i32; } def atomic_load_64 : PatFrag<(ops node:$ptr), (atomic_load node:$ptr)> { let IsAtomic = true; let MemoryVT = i64; } def atomic_load_zext_8 : PatFrag<(ops node:$ptr), (atomic_load_zext node:$ptr)> { let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic? let MemoryVT = i8; } def atomic_load_zext_16 : PatFrag<(ops node:$ptr), (atomic_load_zext node:$ptr)> { let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic? let MemoryVT = i16; } def atomic_load_sext_8 : PatFrag<(ops node:$ptr), (atomic_load_sext node:$ptr)> { let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic? let MemoryVT = i8; } def atomic_load_sext_16 : PatFrag<(ops node:$ptr), (atomic_load_sext node:$ptr)> { let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic? let MemoryVT = i16; } // Atomic load which zeroes or anyextends the high bits. def atomic_load_az_8 : PatFrags<(ops node:$op), [(atomic_load_8 node:$op), (atomic_load_zext_8 node:$op)]>; // Atomic load which zeroes or anyextends the high bits. def atomic_load_az_16 : PatFrags<(ops node:$op), [(atomic_load_16 node:$op), (atomic_load_zext_16 node:$op)]>; def nonext_masked_gather : PatFrag<(ops node:$def, node:$pred, node:$ptr, node:$idx), (masked_gather node:$def, node:$pred, node:$ptr, node:$idx), [{ return cast(N)->getExtensionType() == ISD::NON_EXTLOAD; }]>; // Any extending masked gather fragments. def ext_masked_gather_i8 : PatFrag<(ops node:$def, node:$pred, node:$ptr, node:$idx), (masked_gather node:$def, node:$pred, node:$ptr, node:$idx), [{ auto MGN = cast(N); return MGN->getExtensionType() == ISD::EXTLOAD && MGN->getMemoryVT().getScalarType() == MVT::i8; }]>; def ext_masked_gather_i16 : PatFrag<(ops node:$def, node:$pred, node:$ptr, node:$idx), (masked_gather node:$def, node:$pred, node:$ptr, node:$idx), [{ auto MGN = cast(N); return MGN->getExtensionType() == ISD::EXTLOAD && MGN->getMemoryVT().getScalarType() == MVT::i16; }]>; def ext_masked_gather_i32 : PatFrag<(ops node:$def, node:$pred, node:$ptr, node:$idx), (masked_gather node:$def, node:$pred, node:$ptr, node:$idx), [{ auto MGN = cast(N); return MGN->getExtensionType() == ISD::EXTLOAD && MGN->getMemoryVT().getScalarType() == MVT::i32; }]>; // Sign extending masked gather fragments. def sext_masked_gather_i8 : PatFrag<(ops node:$def, node:$pred, node:$ptr, node:$idx), (masked_gather node:$def, node:$pred, node:$ptr, node:$idx), [{ auto MGN = cast(N); return MGN->getExtensionType() == ISD::SEXTLOAD && MGN->getMemoryVT().getScalarType() == MVT::i8; }]>; def sext_masked_gather_i16 : PatFrag<(ops node:$def, node:$pred, node:$ptr, node:$idx), (masked_gather node:$def, node:$pred, node:$ptr, node:$idx), [{ auto MGN = cast(N); return MGN->getExtensionType() == ISD::SEXTLOAD && MGN->getMemoryVT().getScalarType() == MVT::i16; }]>; def sext_masked_gather_i32 : PatFrag<(ops node:$def, node:$pred, node:$ptr, node:$idx), (masked_gather node:$def, node:$pred, node:$ptr, node:$idx), [{ auto MGN = cast(N); return MGN->getExtensionType() == ISD::SEXTLOAD && MGN->getMemoryVT().getScalarType() == MVT::i32; }]>; // Zero extending masked gather fragments. def zext_masked_gather_i8 : PatFrag<(ops node:$def, node:$pred, node:$ptr, node:$idx), (masked_gather node:$def, node:$pred, node:$ptr, node:$idx), [{ auto MGN = cast(N); return MGN->getExtensionType() == ISD::ZEXTLOAD && MGN->getMemoryVT().getScalarType() == MVT::i8; }]>; def zext_masked_gather_i16 : PatFrag<(ops node:$def, node:$pred, node:$ptr, node:$idx), (masked_gather node:$def, node:$pred, node:$ptr, node:$idx), [{ auto MGN = cast(N); return MGN->getExtensionType() == ISD::ZEXTLOAD && MGN->getMemoryVT().getScalarType() == MVT::i16; }]>; def zext_masked_gather_i32 : PatFrag<(ops node:$def, node:$pred, node:$ptr, node:$idx), (masked_gather node:$def, node:$pred, node:$ptr, node:$idx), [{ auto MGN = cast(N); return MGN->getExtensionType() == ISD::ZEXTLOAD && MGN->getMemoryVT().getScalarType() == MVT::i32; }]>; // Any/Zero extending masked gather fragments. def azext_masked_gather_i8 : PatFrags<(ops node:$def, node:$pred, node:$ptr, node:$idx), [(ext_masked_gather_i8 node:$def, node:$pred, node:$ptr, node:$idx), (zext_masked_gather_i8 node:$def, node:$pred, node:$ptr, node:$idx)]>; def azext_masked_gather_i16 : PatFrags<(ops node:$def, node:$pred, node:$ptr, node:$idx), [(ext_masked_gather_i16 node:$def, node:$pred, node:$ptr, node:$idx), (zext_masked_gather_i16 node:$def, node:$pred, node:$ptr, node:$idx)]>; def azext_masked_gather_i32 : PatFrags<(ops node:$def, node:$pred, node:$ptr, node:$idx), [(ext_masked_gather_i32 node:$def, node:$pred, node:$ptr, node:$idx), (zext_masked_gather_i32 node:$def, node:$pred, node:$ptr, node:$idx)]>; def nontrunc_masked_scatter : PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), (masked_scatter node:$val, node:$pred, node:$ptr, node:$idx), [{ return !cast(N)->isTruncatingStore(); }]>; // Truncating masked scatter fragments. def trunc_masked_scatter_i8 : PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), (masked_scatter node:$val, node:$pred, node:$ptr, node:$idx), [{ auto MSN = cast(N); return MSN->isTruncatingStore() && MSN->getMemoryVT().getScalarType() == MVT::i8; }]>; def trunc_masked_scatter_i16 : PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), (masked_scatter node:$val, node:$pred, node:$ptr, node:$idx), [{ auto MSN = cast(N); return MSN->isTruncatingStore() && MSN->getMemoryVT().getScalarType() == MVT::i16; }]>; def trunc_masked_scatter_i32 : PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), (masked_scatter node:$val, node:$pred, node:$ptr, node:$idx), [{ auto MSN = cast(N); return MSN->isTruncatingStore() && MSN->getMemoryVT().getScalarType() == MVT::i32; }]>; def atomic_store_8 : PatFrag<(ops node:$val, node:$ptr), (atomic_store node:$val, node:$ptr)> { let IsAtomic = true; let MemoryVT = i8; } def atomic_store_16 : PatFrag<(ops node:$val, node:$ptr), (atomic_store node:$val, node:$ptr)> { let IsAtomic = true; let MemoryVT = i16; } def atomic_store_32 : PatFrag<(ops node:$val, node:$ptr), (atomic_store node:$val, node:$ptr)> { let IsAtomic = true; let MemoryVT = i32; } def atomic_store_64 : PatFrag<(ops node:$val, node:$ptr), (atomic_store node:$val, node:$ptr)> { let IsAtomic = true; let MemoryVT = i64; } //===----------------------------------------------------------------------===// // Selection DAG Pattern Support. // // Patterns are what are actually matched against by the target-flavored // instruction selection DAG. Instructions defined by the target implicitly // define patterns in most cases, but patterns can also be explicitly added when // an operation is defined by a sequence of instructions (e.g. loading a large // immediate value on RISC targets that do not support immediates as large as // their GPRs). // class Pattern resultInstrs> { dag PatternToMatch = patternToMatch; list ResultInstrs = resultInstrs; list Predicates = []; // See class Instruction in Target.td. int AddedComplexity = 0; // See class Instruction in Target.td. bit GISelShouldIgnore = 0; } // Pat - A simple (but common) form of a pattern, which produces a simple result // not needing a full list. class Pat : Pattern; //===----------------------------------------------------------------------===// // Complex pattern definitions. // // Complex patterns, e.g. X86 addressing mode, requires pattern matching code // in C++. Ty is the type of return value; NumOperands is the number of operands // returned by the select function; SelectFunc is the name of the function used // to pattern match the max. pattern; RootNodes are the list of possible root nodes // of the sub-dags to match. // e.g. X86 addressing mode - def addr : ComplexPattern; // class ComplexPattern roots = [], list props = [], int complexity = -1> { ValueType Ty = ty; int NumOperands = numops; string SelectFunc = fn; list RootNodes = roots; list Properties = props; int Complexity = complexity; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 37b1131d2f8a..7fa3b8a73a41 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1,12632 +1,12634 @@ //===- SelectionDAGBuilder.cpp - Selection-DAG building -------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This implements routines for translating from LLVM IR into SelectionDAG IR. // //===----------------------------------------------------------------------===// #include "SelectionDAGBuilder.h" #include "SDNodeDbgValue.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/AssignmentTrackingAnalysis.h" #include "llvm/CodeGen/CodeGenCommonISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineInstrBundleIterator.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RuntimeLibcallUtil.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/SwiftErrorValueTracking.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constant.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/EHPersonalities.h" #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsAArch64.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MemoryModelRelaxationAnnotations.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Statepoint.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/MC/MCContext.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/InstructionCost.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/TargetParser/Triple.h" #include "llvm/Transforms/Utils/Local.h" #include #include #include #include #include #include using namespace llvm; using namespace PatternMatch; using namespace SwitchCG; #define DEBUG_TYPE "isel" /// LimitFloatPrecision - Generate low-precision inline sequences for /// some float libcalls (6, 8 or 12 bits). static unsigned LimitFloatPrecision; static cl::opt InsertAssertAlign("insert-assert-align", cl::init(true), cl::desc("Insert the experimental `assertalign` node."), cl::ReallyHidden); static cl::opt LimitFPPrecision("limit-float-precision", cl::desc("Generate low-precision inline sequences " "for some float libcalls"), cl::location(LimitFloatPrecision), cl::Hidden, cl::init(0)); static cl::opt SwitchPeelThreshold( "switch-peel-threshold", cl::Hidden, cl::init(66), cl::desc("Set the case probability threshold for peeling the case from a " "switch statement. A value greater than 100 will void this " "optimization")); // Limit the width of DAG chains. This is important in general to prevent // DAG-based analysis from blowing up. For example, alias analysis and // load clustering may not complete in reasonable time. It is difficult to // recognize and avoid this situation within each individual analysis, and // future analyses are likely to have the same behavior. Limiting DAG width is // the safe approach and will be especially important with global DAGs. // // MaxParallelChains default is arbitrarily high to avoid affecting // optimization, but could be lowered to improve compile time. Any ld-ld-st-st // sequence over this should have been converted to llvm.memcpy by the // frontend. It is easy to induce this behavior with .ll code such as: // %buffer = alloca [4096 x i8] // %data = load [4096 x i8]* %argPtr // store [4096 x i8] %data, [4096 x i8]* %buffer static const unsigned MaxParallelChains = 64; static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V, SDValue InChain, std::optional CC); /// getCopyFromParts - Create a value that contains the specified legal parts /// combined into the value they represent. If the parts combine to a type /// larger than ValueVT then AssertOp can be used to specify whether the extra /// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT /// (ISD::AssertSext). static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V, SDValue InChain, std::optional CC = std::nullopt, std::optional AssertOp = std::nullopt) { // Let the target assemble the parts if it wants to const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (SDValue Val = TLI.joinRegisterPartsIntoValue(DAG, DL, Parts, NumParts, PartVT, ValueVT, CC)) return Val; if (ValueVT.isVector()) return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT, V, InChain, CC); assert(NumParts > 0 && "No parts to assemble!"); SDValue Val = Parts[0]; if (NumParts > 1) { // Assemble the value from multiple parts. if (ValueVT.isInteger()) { unsigned PartBits = PartVT.getSizeInBits(); unsigned ValueBits = ValueVT.getSizeInBits(); // Assemble the power of 2 part. unsigned RoundParts = llvm::bit_floor(NumParts); unsigned RoundBits = PartBits * RoundParts; EVT RoundVT = RoundBits == ValueBits ? ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits); SDValue Lo, Hi; EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2); if (RoundParts > 2) { Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2, PartVT, HalfVT, V, InChain); Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2, RoundParts / 2, PartVT, HalfVT, V, InChain); } else { Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]); Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]); } if (DAG.getDataLayout().isBigEndian()) std::swap(Lo, Hi); Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi); if (RoundParts < NumParts) { // Assemble the trailing non-power-of-2 part. unsigned OddParts = NumParts - RoundParts; EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits); Hi = getCopyFromParts(DAG, DL, Parts + RoundParts, OddParts, PartVT, OddVT, V, InChain, CC); // Combine the round and odd parts. Lo = Val; if (DAG.getDataLayout().isBigEndian()) std::swap(Lo, Hi); EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi); Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi, DAG.getConstant(Lo.getValueSizeInBits(), DL, TLI.getShiftAmountTy( TotalVT, DAG.getDataLayout()))); Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo); Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi); } } else if (PartVT.isFloatingPoint()) { // FP split into multiple FP parts (for ppcf128) assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 && "Unexpected split"); SDValue Lo, Hi; Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]); Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]); if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout())) std::swap(Lo, Hi); Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi); } else { // FP split into integer parts (soft fp) assert(ValueVT.isFloatingPoint() && PartVT.isInteger() && !PartVT.isVector() && "Unexpected split"); EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V, InChain, CC); } } // There is now one part, held in Val. Correct it to match ValueVT. // PartEVT is the type of the register class that holds the value. // ValueVT is the type of the inline asm operation. EVT PartEVT = Val.getValueType(); if (PartEVT == ValueVT) return Val; if (PartEVT.isInteger() && ValueVT.isFloatingPoint() && ValueVT.bitsLT(PartEVT)) { // For an FP value in an integer part, we need to truncate to the right // width first. PartEVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); Val = DAG.getNode(ISD::TRUNCATE, DL, PartEVT, Val); } // Handle types that have the same size. if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits()) return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); // Handle types with different sizes. if (PartEVT.isInteger() && ValueVT.isInteger()) { if (ValueVT.bitsLT(PartEVT)) { // For a truncate, see if we have any information to // indicate whether the truncated bits will always be // zero or sign-extension. if (AssertOp) Val = DAG.getNode(*AssertOp, DL, PartEVT, Val, DAG.getValueType(ValueVT)); return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); } return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val); } if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { // FP_ROUND's are always exact here. if (ValueVT.bitsLT(Val.getValueType())) { SDValue NoChange = DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())); if (DAG.getMachineFunction().getFunction().getAttributes().hasFnAttr( llvm::Attribute::StrictFP)) { return DAG.getNode(ISD::STRICT_FP_ROUND, DL, DAG.getVTList(ValueVT, MVT::Other), InChain, Val, NoChange); } return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val, NoChange); } return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val); } // Handle MMX to a narrower integer type by bitcasting MMX to integer and // then truncating. if (PartEVT == MVT::x86mmx && ValueVT.isInteger() && ValueVT.bitsLT(PartEVT)) { Val = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Val); return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); } report_fatal_error("Unknown mismatch in getCopyFromParts!"); } static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V, const Twine &ErrMsg) { const Instruction *I = dyn_cast_or_null(V); if (!V) return Ctx.emitError(ErrMsg); const char *AsmError = ", possible invalid constraint for vector type"; if (const CallInst *CI = dyn_cast(I)) if (CI->isInlineAsm()) return Ctx.emitError(I, ErrMsg + AsmError); return Ctx.emitError(I, ErrMsg); } /// getCopyFromPartsVector - Create a value that contains the specified legal /// parts combined into the value they represent. If the parts combine to a /// type larger than ValueVT then AssertOp can be used to specify whether the /// extra bits are known to be zero (ISD::AssertZext) or sign extended from /// ValueVT (ISD::AssertSext). static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V, SDValue InChain, std::optional CallConv) { assert(ValueVT.isVector() && "Not a vector value"); assert(NumParts > 0 && "No parts to assemble!"); const bool IsABIRegCopy = CallConv.has_value(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Val = Parts[0]; // Handle a multi-element vector. if (NumParts > 1) { EVT IntermediateVT; MVT RegisterVT; unsigned NumIntermediates; unsigned NumRegs; if (IsABIRegCopy) { NumRegs = TLI.getVectorTypeBreakdownForCallingConv( *DAG.getContext(), *CallConv, ValueVT, IntermediateVT, NumIntermediates, RegisterVT); } else { NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates, RegisterVT); } assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); NumParts = NumRegs; // Silence a compiler warning. assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); assert(RegisterVT.getSizeInBits() == Parts[0].getSimpleValueType().getSizeInBits() && "Part type sizes don't match!"); // Assemble the parts into intermediate operands. SmallVector Ops(NumIntermediates); if (NumIntermediates == NumParts) { // If the register was not expanded, truncate or copy the value, // as appropriate. for (unsigned i = 0; i != NumParts; ++i) Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1, PartVT, IntermediateVT, V, InChain, CallConv); } else if (NumParts > 0) { // If the intermediate type was expanded, build the intermediate // operands from the parts. assert(NumParts % NumIntermediates == 0 && "Must expand into a divisible number of parts!"); unsigned Factor = NumParts / NumIntermediates; for (unsigned i = 0; i != NumIntermediates; ++i) Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor, PartVT, IntermediateVT, V, InChain, CallConv); } // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the // intermediate operands. EVT BuiltVectorTy = IntermediateVT.isVector() ? EVT::getVectorVT( *DAG.getContext(), IntermediateVT.getScalarType(), IntermediateVT.getVectorElementCount() * NumParts) : EVT::getVectorVT(*DAG.getContext(), IntermediateVT.getScalarType(), NumIntermediates); Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, DL, BuiltVectorTy, Ops); } // There is now one part, held in Val. Correct it to match ValueVT. EVT PartEVT = Val.getValueType(); if (PartEVT == ValueVT) return Val; if (PartEVT.isVector()) { // Vector/Vector bitcast. if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); // If the parts vector has more elements than the value vector, then we // have a vector widening case (e.g. <2 x float> -> <4 x float>). // Extract the elements we want. if (PartEVT.getVectorElementCount() != ValueVT.getVectorElementCount()) { assert((PartEVT.getVectorElementCount().getKnownMinValue() > ValueVT.getVectorElementCount().getKnownMinValue()) && (PartEVT.getVectorElementCount().isScalable() == ValueVT.getVectorElementCount().isScalable()) && "Cannot narrow, it would be a lossy transformation"); PartEVT = EVT::getVectorVT(*DAG.getContext(), PartEVT.getVectorElementType(), ValueVT.getVectorElementCount()); Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, PartEVT, Val, DAG.getVectorIdxConstant(0, DL)); if (PartEVT == ValueVT) return Val; if (PartEVT.isInteger() && ValueVT.isFloatingPoint()) return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); // Vector/Vector bitcast (e.g. <2 x bfloat> -> <2 x half>). if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); } // Promoted vector extract return DAG.getAnyExtOrTrunc(Val, DL, ValueVT); } // Trivial bitcast if the types are the same size and the destination // vector type is legal. if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits() && TLI.isTypeLegal(ValueVT)) return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); if (ValueVT.getVectorNumElements() != 1) { // Certain ABIs require that vectors are passed as integers. For vectors // are the same size, this is an obvious bitcast. if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) { return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); } else if (ValueVT.bitsLT(PartEVT)) { const uint64_t ValueSize = ValueVT.getFixedSizeInBits(); EVT IntermediateType = EVT::getIntegerVT(*DAG.getContext(), ValueSize); // Drop the extra bits. Val = DAG.getNode(ISD::TRUNCATE, DL, IntermediateType, Val); return DAG.getBitcast(ValueVT, Val); } diagnosePossiblyInvalidConstraint( *DAG.getContext(), V, "non-trivial scalar-to-vector conversion"); return DAG.getUNDEF(ValueVT); } // Handle cases such as i8 -> <1 x i1> EVT ValueSVT = ValueVT.getVectorElementType(); if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT) { unsigned ValueSize = ValueSVT.getSizeInBits(); if (ValueSize == PartEVT.getSizeInBits()) { Val = DAG.getNode(ISD::BITCAST, DL, ValueSVT, Val); } else if (ValueSVT.isFloatingPoint() && PartEVT.isInteger()) { // It's possible a scalar floating point type gets softened to integer and // then promoted to a larger integer. If PartEVT is the larger integer // we need to truncate it and then bitcast to the FP type. assert(ValueSVT.bitsLT(PartEVT) && "Unexpected types"); EVT IntermediateType = EVT::getIntegerVT(*DAG.getContext(), ValueSize); Val = DAG.getNode(ISD::TRUNCATE, DL, IntermediateType, Val); Val = DAG.getBitcast(ValueSVT, Val); } else { Val = ValueVT.isFloatingPoint() ? DAG.getFPExtendOrRound(Val, DL, ValueSVT) : DAG.getAnyExtOrTrunc(Val, DL, ValueSVT); } } return DAG.getBuildVector(ValueVT, DL, Val); } static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, const Value *V, std::optional CallConv); /// getCopyToParts - Create a series of nodes that contain the specified value /// split into legal parts. If the parts contain more bits than Val, then, for /// integers, ExtendKind can be used to specify how to generate the extra bits. static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, const Value *V, std::optional CallConv = std::nullopt, ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { // Let the target split the parts if it wants to const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (TLI.splitValueIntoRegisterParts(DAG, DL, Val, Parts, NumParts, PartVT, CallConv)) return; EVT ValueVT = Val.getValueType(); // Handle the vector case separately. if (ValueVT.isVector()) return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V, CallConv); unsigned OrigNumParts = NumParts; assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) && "Copying to an illegal type!"); if (NumParts == 0) return; assert(!ValueVT.isVector() && "Vector case handled elsewhere"); EVT PartEVT = PartVT; if (PartEVT == ValueVT) { assert(NumParts == 1 && "No-op copy with multiple parts!"); Parts[0] = Val; return; } unsigned PartBits = PartVT.getSizeInBits(); if (NumParts * PartBits > ValueVT.getSizeInBits()) { // If the parts cover more bits than the value has, promote the value. if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { assert(NumParts == 1 && "Do not know what to promote to!"); Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val); } else { if (ValueVT.isFloatingPoint()) { // FP values need to be bitcast, then extended if they are being put // into a larger container. ValueVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); } assert((PartVT.isInteger() || PartVT == MVT::x86mmx) && ValueVT.isInteger() && "Unknown mismatch!"); ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Val = DAG.getNode(ExtendKind, DL, ValueVT, Val); if (PartVT == MVT::x86mmx) Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } } else if (PartBits == ValueVT.getSizeInBits()) { // Different types of the same size. assert(NumParts == 1 && PartEVT != ValueVT); Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } else if (NumParts * PartBits < ValueVT.getSizeInBits()) { // If the parts cover less bits than value has, truncate the value. assert((PartVT.isInteger() || PartVT == MVT::x86mmx) && ValueVT.isInteger() && "Unknown mismatch!"); ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); if (PartVT == MVT::x86mmx) Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } // The value may have changed - recompute ValueVT. ValueVT = Val.getValueType(); assert(NumParts * PartBits == ValueVT.getSizeInBits() && "Failed to tile the value with PartVT!"); if (NumParts == 1) { if (PartEVT != ValueVT) { diagnosePossiblyInvalidConstraint(*DAG.getContext(), V, "scalar-to-vector conversion failed"); Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } Parts[0] = Val; return; } // Expand the value into multiple parts. if (NumParts & (NumParts - 1)) { // The number of parts is not a power of 2. Split off and copy the tail. assert(PartVT.isInteger() && ValueVT.isInteger() && "Do not know what to expand to!"); unsigned RoundParts = llvm::bit_floor(NumParts); unsigned RoundBits = RoundParts * PartBits; unsigned OddParts = NumParts - RoundParts; SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val, DAG.getShiftAmountConstant(RoundBits, ValueVT, DL)); getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V, CallConv); if (DAG.getDataLayout().isBigEndian()) // The odd parts were reversed by getCopyToParts - unreverse them. std::reverse(Parts + RoundParts, Parts + NumParts); NumParts = RoundParts; ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); } // The number of parts is a power of 2. Repeatedly bisect the value using // EXTRACT_ELEMENT. Parts[0] = DAG.getNode(ISD::BITCAST, DL, EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()), Val); for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) { for (unsigned i = 0; i < NumParts; i += StepSize) { unsigned ThisBits = StepSize * PartBits / 2; EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits); SDValue &Part0 = Parts[i]; SDValue &Part1 = Parts[i+StepSize/2]; Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, ThisVT, Part0, DAG.getIntPtrConstant(1, DL)); Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, ThisVT, Part0, DAG.getIntPtrConstant(0, DL)); if (ThisBits == PartBits && ThisVT != PartVT) { Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0); Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1); } } } if (DAG.getDataLayout().isBigEndian()) std::reverse(Parts, Parts + OrigNumParts); } static SDValue widenVectorToPartType(SelectionDAG &DAG, SDValue Val, const SDLoc &DL, EVT PartVT) { if (!PartVT.isVector()) return SDValue(); EVT ValueVT = Val.getValueType(); EVT PartEVT = PartVT.getVectorElementType(); EVT ValueEVT = ValueVT.getVectorElementType(); ElementCount PartNumElts = PartVT.getVectorElementCount(); ElementCount ValueNumElts = ValueVT.getVectorElementCount(); // We only support widening vectors with equivalent element types and // fixed/scalable properties. If a target needs to widen a fixed-length type // to a scalable one, it should be possible to use INSERT_SUBVECTOR below. if (ElementCount::isKnownLE(PartNumElts, ValueNumElts) || PartNumElts.isScalable() != ValueNumElts.isScalable()) return SDValue(); // Have a try for bf16 because some targets share its ABI with fp16. if (ValueEVT == MVT::bf16 && PartEVT == MVT::f16) { assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) && "Cannot widen to illegal type"); Val = DAG.getNode(ISD::BITCAST, DL, ValueVT.changeVectorElementType(MVT::f16), Val); } else if (PartEVT != ValueEVT) { return SDValue(); } // Widening a scalable vector to another scalable vector is done by inserting // the vector into a larger undef one. if (PartNumElts.isScalable()) return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT), Val, DAG.getVectorIdxConstant(0, DL)); // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in // undef elements. SmallVector Ops; DAG.ExtractVectorElements(Val, Ops); SDValue EltUndef = DAG.getUNDEF(PartEVT); Ops.append((PartNumElts - ValueNumElts).getFixedValue(), EltUndef); // FIXME: Use CONCAT for 2x -> 4x. return DAG.getBuildVector(PartVT, DL, Ops); } /// getCopyToPartsVector - Create a series of nodes that contain the specified /// value split into legal parts. static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, const Value *V, std::optional CallConv) { EVT ValueVT = Val.getValueType(); assert(ValueVT.isVector() && "Not a vector"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const bool IsABIRegCopy = CallConv.has_value(); if (NumParts == 1) { EVT PartEVT = PartVT; if (PartEVT == ValueVT) { // Nothing to do. } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) { // Bitconvert vector->vector case. Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } else if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, PartVT)) { Val = Widened; } else if (PartVT.isVector() && PartEVT.getVectorElementType().bitsGE( ValueVT.getVectorElementType()) && PartEVT.getVectorElementCount() == ValueVT.getVectorElementCount()) { // Promoted vector extract Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); } else if (PartEVT.isVector() && PartEVT.getVectorElementType() != ValueVT.getVectorElementType() && TLI.getTypeAction(*DAG.getContext(), ValueVT) == TargetLowering::TypeWidenVector) { // Combination of widening and promotion. EVT WidenVT = EVT::getVectorVT(*DAG.getContext(), ValueVT.getVectorElementType(), PartVT.getVectorElementCount()); SDValue Widened = widenVectorToPartType(DAG, Val, DL, WidenVT); Val = DAG.getAnyExtOrTrunc(Widened, DL, PartVT); } else { // Don't extract an integer from a float vector. This can happen if the // FP type gets softened to integer and then promoted. The promotion // prevents it from being picked up by the earlier bitcast case. if (ValueVT.getVectorElementCount().isScalar() && (!ValueVT.isFloatingPoint() || !PartVT.isInteger())) { // If we reach this condition and PartVT is FP, this means that // ValueVT is also FP and both have a different size, otherwise we // would have bitcasted them. Producing an EXTRACT_VECTOR_ELT here // would be invalid since that would mean the smaller FP type has to // be extended to the larger one. if (PartVT.isFloatingPoint()) { Val = DAG.getBitcast(ValueVT.getScalarType(), Val); Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val); } else Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, DAG.getVectorIdxConstant(0, DL)); } else { uint64_t ValueSize = ValueVT.getFixedSizeInBits(); assert(PartVT.getFixedSizeInBits() > ValueSize && "lossy conversion of vector to scalar type"); EVT IntermediateType = EVT::getIntegerVT(*DAG.getContext(), ValueSize); Val = DAG.getBitcast(IntermediateType, Val); Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); } } assert(Val.getValueType() == PartVT && "Unexpected vector part value type"); Parts[0] = Val; return; } // Handle a multi-element vector. EVT IntermediateVT; MVT RegisterVT; unsigned NumIntermediates; unsigned NumRegs; if (IsABIRegCopy) { NumRegs = TLI.getVectorTypeBreakdownForCallingConv( *DAG.getContext(), *CallConv, ValueVT, IntermediateVT, NumIntermediates, RegisterVT); } else { NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates, RegisterVT); } assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); NumParts = NumRegs; // Silence a compiler warning. assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); assert(IntermediateVT.isScalableVector() == ValueVT.isScalableVector() && "Mixing scalable and fixed vectors when copying in parts"); std::optional DestEltCnt; if (IntermediateVT.isVector()) DestEltCnt = IntermediateVT.getVectorElementCount() * NumIntermediates; else DestEltCnt = ElementCount::getFixed(NumIntermediates); EVT BuiltVectorTy = EVT::getVectorVT( *DAG.getContext(), IntermediateVT.getScalarType(), *DestEltCnt); if (ValueVT == BuiltVectorTy) { // Nothing to do. } else if (ValueVT.getSizeInBits() == BuiltVectorTy.getSizeInBits()) { // Bitconvert vector->vector case. Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val); } else { if (BuiltVectorTy.getVectorElementType().bitsGT( ValueVT.getVectorElementType())) { // Integer promotion. ValueVT = EVT::getVectorVT(*DAG.getContext(), BuiltVectorTy.getVectorElementType(), ValueVT.getVectorElementCount()); Val = DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val); } if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, BuiltVectorTy)) { Val = Widened; } } assert(Val.getValueType() == BuiltVectorTy && "Unexpected vector value type"); // Split the vector into intermediate operands. SmallVector Ops(NumIntermediates); for (unsigned i = 0; i != NumIntermediates; ++i) { if (IntermediateVT.isVector()) { // This does something sensible for scalable vectors - see the // definition of EXTRACT_SUBVECTOR for further details. unsigned IntermediateNumElts = IntermediateVT.getVectorMinNumElements(); Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val, DAG.getVectorIdxConstant(i * IntermediateNumElts, DL)); } else { Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val, DAG.getVectorIdxConstant(i, DL)); } } // Split the intermediate operands into legal parts. if (NumParts == NumIntermediates) { // If the register was not expanded, promote or copy the value, // as appropriate. for (unsigned i = 0; i != NumParts; ++i) getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V, CallConv); } else if (NumParts > 0) { // If the intermediate type was expanded, split each the value into // legal parts. assert(NumIntermediates != 0 && "division by zero"); assert(NumParts % NumIntermediates == 0 && "Must expand into a divisible number of parts!"); unsigned Factor = NumParts / NumIntermediates; for (unsigned i = 0; i != NumIntermediates; ++i) getCopyToParts(DAG, DL, Ops[i], &Parts[i * Factor], Factor, PartVT, V, CallConv); } } RegsForValue::RegsForValue(const SmallVector ®s, MVT regvt, EVT valuevt, std::optional CC) : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs), RegCount(1, regs.size()), CallConv(CC) {} RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI, const DataLayout &DL, unsigned Reg, Type *Ty, std::optional CC) { ComputeValueVTs(TLI, DL, Ty, ValueVTs); CallConv = CC; for (EVT ValueVT : ValueVTs) { unsigned NumRegs = isABIMangled() ? TLI.getNumRegistersForCallingConv(Context, *CC, ValueVT) : TLI.getNumRegisters(Context, ValueVT); MVT RegisterVT = isABIMangled() ? TLI.getRegisterTypeForCallingConv(Context, *CC, ValueVT) : TLI.getRegisterType(Context, ValueVT); for (unsigned i = 0; i != NumRegs; ++i) Regs.push_back(Reg + i); RegVTs.push_back(RegisterVT); RegCount.push_back(NumRegs); Reg += NumRegs; } } SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, const SDLoc &dl, SDValue &Chain, SDValue *Glue, const Value *V) const { // A Value with type {} or [0 x %t] needs no registers. if (ValueVTs.empty()) return SDValue(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // Assemble the legal parts into the final values. SmallVector Values(ValueVTs.size()); SmallVector Parts; for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { // Copy the legal parts from the registers. EVT ValueVT = ValueVTs[Value]; unsigned NumRegs = RegCount[Value]; MVT RegisterVT = isABIMangled() ? TLI.getRegisterTypeForCallingConv( *DAG.getContext(), *CallConv, RegVTs[Value]) : RegVTs[Value]; Parts.resize(NumRegs); for (unsigned i = 0; i != NumRegs; ++i) { SDValue P; if (!Glue) { P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT); } else { P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Glue); *Glue = P.getValue(2); } Chain = P.getValue(1); Parts[i] = P; // If the source register was virtual and if we know something about it, // add an assert node. if (!Register::isVirtualRegister(Regs[Part + i]) || !RegisterVT.isInteger()) continue; const FunctionLoweringInfo::LiveOutInfo *LOI = FuncInfo.GetLiveOutRegInfo(Regs[Part+i]); if (!LOI) continue; unsigned RegSize = RegisterVT.getScalarSizeInBits(); unsigned NumSignBits = LOI->NumSignBits; unsigned NumZeroBits = LOI->Known.countMinLeadingZeros(); if (NumZeroBits == RegSize) { // The current value is a zero. // Explicitly express that as it would be easier for // optimizations to kick in. Parts[i] = DAG.getConstant(0, dl, RegisterVT); continue; } // FIXME: We capture more information than the dag can represent. For // now, just use the tightest assertzext/assertsext possible. bool isSExt; EVT FromVT(MVT::Other); if (NumZeroBits) { FromVT = EVT::getIntegerVT(*DAG.getContext(), RegSize - NumZeroBits); isSExt = false; } else if (NumSignBits > 1) { FromVT = EVT::getIntegerVT(*DAG.getContext(), RegSize - NumSignBits + 1); isSExt = true; } else { continue; } // Add an assertion node. assert(FromVT != MVT::Other); Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl, RegisterVT, P, DAG.getValueType(FromVT)); } Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), NumRegs, RegisterVT, ValueVT, V, Chain, CallConv); Part += NumRegs; Parts.clear(); } return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values); } void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, const SDLoc &dl, SDValue &Chain, SDValue *Glue, const Value *V, ISD::NodeType PreferredExtendType) const { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); ISD::NodeType ExtendKind = PreferredExtendType; // Get the list of the values's legal parts. unsigned NumRegs = Regs.size(); SmallVector Parts(NumRegs); for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { unsigned NumParts = RegCount[Value]; MVT RegisterVT = isABIMangled() ? TLI.getRegisterTypeForCallingConv( *DAG.getContext(), *CallConv, RegVTs[Value]) : RegVTs[Value]; if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT)) ExtendKind = ISD::ZERO_EXTEND; getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), &Parts[Part], NumParts, RegisterVT, V, CallConv, ExtendKind); Part += NumParts; } // Copy the parts into the registers. SmallVector Chains(NumRegs); for (unsigned i = 0; i != NumRegs; ++i) { SDValue Part; if (!Glue) { Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]); } else { Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Glue); *Glue = Part.getValue(1); } Chains[i] = Part.getValue(0); } if (NumRegs == 1 || Glue) // If NumRegs > 1 && Glue is used then the use of the last CopyToReg is // flagged to it. That is the CopyToReg nodes and the user are considered // a single scheduling unit. If we create a TokenFactor and return it as // chain, then the TokenFactor is both a predecessor (operand) of the // user as well as a successor (the TF operands are flagged to the user). // c1, f1 = CopyToReg // c2, f2 = CopyToReg // c3 = TokenFactor c1, c2 // ... // = op c3, ..., f2 Chain = Chains[NumRegs-1]; else Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); } void RegsForValue::AddInlineAsmOperands(InlineAsm::Kind Code, bool HasMatching, unsigned MatchingIdx, const SDLoc &dl, SelectionDAG &DAG, std::vector &Ops) const { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); InlineAsm::Flag Flag(Code, Regs.size()); if (HasMatching) Flag.setMatchingOp(MatchingIdx); else if (!Regs.empty() && Register::isVirtualRegister(Regs.front())) { // Put the register class of the virtual registers in the flag word. That // way, later passes can recompute register class constraints for inline // assembly as well as normal instructions. // Don't do this for tied operands that can use the regclass information // from the def. const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); const TargetRegisterClass *RC = MRI.getRegClass(Regs.front()); Flag.setRegClass(RC->getID()); } SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32); Ops.push_back(Res); if (Code == InlineAsm::Kind::Clobber) { // Clobbers should always have a 1:1 mapping with registers, and may // reference registers that have illegal (e.g. vector) types. Hence, we // shouldn't try to apply any sort of splitting logic to them. assert(Regs.size() == RegVTs.size() && Regs.size() == ValueVTs.size() && "No 1:1 mapping from clobbers to regs?"); Register SP = TLI.getStackPointerRegisterToSaveRestore(); (void)SP; for (unsigned I = 0, E = ValueVTs.size(); I != E; ++I) { Ops.push_back(DAG.getRegister(Regs[I], RegVTs[I])); assert( (Regs[I] != SP || DAG.getMachineFunction().getFrameInfo().hasOpaqueSPAdjustment()) && "If we clobbered the stack pointer, MFI should know about it."); } return; } for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { MVT RegisterVT = RegVTs[Value]; unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value], RegisterVT); for (unsigned i = 0; i != NumRegs; ++i) { assert(Reg < Regs.size() && "Mismatch in # registers expected"); unsigned TheReg = Regs[Reg++]; Ops.push_back(DAG.getRegister(TheReg, RegisterVT)); } } } SmallVector, 4> RegsForValue::getRegsAndSizes() const { SmallVector, 4> OutVec; unsigned I = 0; for (auto CountAndVT : zip_first(RegCount, RegVTs)) { unsigned RegCount = std::get<0>(CountAndVT); MVT RegisterVT = std::get<1>(CountAndVT); TypeSize RegisterSize = RegisterVT.getSizeInBits(); for (unsigned E = I + RegCount; I != E; ++I) OutVec.push_back(std::make_pair(Regs[I], RegisterSize)); } return OutVec; } void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa, AssumptionCache *ac, const TargetLibraryInfo *li) { AA = aa; AC = ac; GFI = gfi; LibInfo = li; Context = DAG.getContext(); LPadToCallSiteMap.clear(); SL->init(DAG.getTargetLoweringInfo(), TM, DAG.getDataLayout()); AssignmentTrackingEnabled = isAssignmentTrackingEnabled( *DAG.getMachineFunction().getFunction().getParent()); } void SelectionDAGBuilder::clear() { NodeMap.clear(); UnusedArgNodeMap.clear(); PendingLoads.clear(); PendingExports.clear(); PendingConstrainedFP.clear(); PendingConstrainedFPStrict.clear(); CurInst = nullptr; HasTailCall = false; SDNodeOrder = LowestSDNodeOrder; StatepointLowering.clear(); } void SelectionDAGBuilder::clearDanglingDebugInfo() { DanglingDebugInfoMap.clear(); } // Update DAG root to include dependencies on Pending chains. SDValue SelectionDAGBuilder::updateRoot(SmallVectorImpl &Pending) { SDValue Root = DAG.getRoot(); if (Pending.empty()) return Root; // Add current root to PendingChains, unless we already indirectly // depend on it. if (Root.getOpcode() != ISD::EntryToken) { unsigned i = 0, e = Pending.size(); for (; i != e; ++i) { assert(Pending[i].getNode()->getNumOperands() > 1); if (Pending[i].getNode()->getOperand(0) == Root) break; // Don't add the root if we already indirectly depend on it. } if (i == e) Pending.push_back(Root); } if (Pending.size() == 1) Root = Pending[0]; else Root = DAG.getTokenFactor(getCurSDLoc(), Pending); DAG.setRoot(Root); Pending.clear(); return Root; } SDValue SelectionDAGBuilder::getMemoryRoot() { return updateRoot(PendingLoads); } SDValue SelectionDAGBuilder::getRoot() { // Chain up all pending constrained intrinsics together with all // pending loads, by simply appending them to PendingLoads and // then calling getMemoryRoot(). PendingLoads.reserve(PendingLoads.size() + PendingConstrainedFP.size() + PendingConstrainedFPStrict.size()); PendingLoads.append(PendingConstrainedFP.begin(), PendingConstrainedFP.end()); PendingLoads.append(PendingConstrainedFPStrict.begin(), PendingConstrainedFPStrict.end()); PendingConstrainedFP.clear(); PendingConstrainedFPStrict.clear(); return getMemoryRoot(); } SDValue SelectionDAGBuilder::getControlRoot() { // We need to emit pending fpexcept.strict constrained intrinsics, // so append them to the PendingExports list. PendingExports.append(PendingConstrainedFPStrict.begin(), PendingConstrainedFPStrict.end()); PendingConstrainedFPStrict.clear(); return updateRoot(PendingExports); } void SelectionDAGBuilder::handleDebugDeclare(Value *Address, DILocalVariable *Variable, DIExpression *Expression, DebugLoc DL) { assert(Variable && "Missing variable"); // Check if address has undef value. if (!Address || isa(Address) || (Address->use_empty() && !isa(Address))) { LLVM_DEBUG( dbgs() << "dbg_declare: Dropping debug info (bad/undef/unused-arg address)\n"); return; } bool IsParameter = Variable->isParameter() || isa(Address); SDValue &N = NodeMap[Address]; if (!N.getNode() && isa(Address)) // Check unused arguments map. N = UnusedArgNodeMap[Address]; SDDbgValue *SDV; if (N.getNode()) { if (const BitCastInst *BCI = dyn_cast(Address)) Address = BCI->getOperand(0); // Parameters are handled specially. auto *FINode = dyn_cast(N.getNode()); if (IsParameter && FINode) { // Byval parameter. We have a frame index at this point. SDV = DAG.getFrameIndexDbgValue(Variable, Expression, FINode->getIndex(), /*IsIndirect*/ true, DL, SDNodeOrder); } else if (isa(Address)) { // Address is an argument, so try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. EmitFuncArgumentDbgValue(Address, Variable, Expression, DL, FuncArgumentDbgValueKind::Declare, N); return; } else { SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), true, DL, SDNodeOrder); } DAG.AddDbgValue(SDV, IsParameter); } else { // If Address is an argument then try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, DL, FuncArgumentDbgValueKind::Declare, N)) { LLVM_DEBUG(dbgs() << "dbg_declare: Dropping debug info" << " (could not emit func-arg dbg_value)\n"); } } return; } void SelectionDAGBuilder::visitDbgInfo(const Instruction &I) { // Add SDDbgValue nodes for any var locs here. Do so before updating // SDNodeOrder, as this mapping is {Inst -> Locs BEFORE Inst}. if (FunctionVarLocs const *FnVarLocs = DAG.getFunctionVarLocs()) { // Add SDDbgValue nodes for any var locs here. Do so before updating // SDNodeOrder, as this mapping is {Inst -> Locs BEFORE Inst}. for (auto It = FnVarLocs->locs_begin(&I), End = FnVarLocs->locs_end(&I); It != End; ++It) { auto *Var = FnVarLocs->getDILocalVariable(It->VariableID); dropDanglingDebugInfo(Var, It->Expr); if (It->Values.isKillLocation(It->Expr)) { handleKillDebugValue(Var, It->Expr, It->DL, SDNodeOrder); continue; } SmallVector Values(It->Values.location_ops()); if (!handleDebugValue(Values, Var, It->Expr, It->DL, SDNodeOrder, It->Values.hasArgList())) { SmallVector Vals; for (Value *V : It->Values.location_ops()) Vals.push_back(V); addDanglingDebugInfo(Vals, FnVarLocs->getDILocalVariable(It->VariableID), It->Expr, Vals.size() > 1, It->DL, SDNodeOrder); } } } // We must skip DbgVariableRecords if they've already been processed above as // we have just emitted the debug values resulting from assignment tracking // analysis, making any existing DbgVariableRecords redundant (and probably // less correct). We still need to process DbgLabelRecords. This does sink // DbgLabelRecords to the bottom of the group of debug records. That sholdn't // be important as it does so deterministcally and ordering between // DbgLabelRecords and DbgVariableRecords is immaterial (other than for MIR/IR // printing). bool SkipDbgVariableRecords = DAG.getFunctionVarLocs(); // Is there is any debug-info attached to this instruction, in the form of // DbgRecord non-instruction debug-info records. for (DbgRecord &DR : I.getDbgRecordRange()) { if (DbgLabelRecord *DLR = dyn_cast(&DR)) { assert(DLR->getLabel() && "Missing label"); SDDbgLabel *SDV = DAG.getDbgLabel(DLR->getLabel(), DLR->getDebugLoc(), SDNodeOrder); DAG.AddDbgLabel(SDV); continue; } if (SkipDbgVariableRecords) continue; DbgVariableRecord &DVR = cast(DR); DILocalVariable *Variable = DVR.getVariable(); DIExpression *Expression = DVR.getExpression(); dropDanglingDebugInfo(Variable, Expression); if (DVR.getType() == DbgVariableRecord::LocationType::Declare) { if (FuncInfo.PreprocessedDVRDeclares.contains(&DVR)) continue; LLVM_DEBUG(dbgs() << "SelectionDAG visiting dbg_declare: " << DVR << "\n"); handleDebugDeclare(DVR.getVariableLocationOp(0), Variable, Expression, DVR.getDebugLoc()); continue; } // A DbgVariableRecord with no locations is a kill location. SmallVector Values(DVR.location_ops()); if (Values.empty()) { handleKillDebugValue(Variable, Expression, DVR.getDebugLoc(), SDNodeOrder); continue; } // A DbgVariableRecord with an undef or absent location is also a kill // location. if (llvm::any_of(Values, [](Value *V) { return !V || isa(V); })) { handleKillDebugValue(Variable, Expression, DVR.getDebugLoc(), SDNodeOrder); continue; } bool IsVariadic = DVR.hasArgList(); if (!handleDebugValue(Values, Variable, Expression, DVR.getDebugLoc(), SDNodeOrder, IsVariadic)) { addDanglingDebugInfo(Values, Variable, Expression, IsVariadic, DVR.getDebugLoc(), SDNodeOrder); } } } void SelectionDAGBuilder::visit(const Instruction &I) { visitDbgInfo(I); // Set up outgoing PHI node register values before emitting the terminator. if (I.isTerminator()) { HandlePHINodesInSuccessorBlocks(I.getParent()); } // Increase the SDNodeOrder if dealing with a non-debug instruction. if (!isa(I)) ++SDNodeOrder; CurInst = &I; // Set inserted listener only if required. bool NodeInserted = false; std::unique_ptr InsertedListener; MDNode *PCSectionsMD = I.getMetadata(LLVMContext::MD_pcsections); MDNode *MMRA = I.getMetadata(LLVMContext::MD_mmra); if (PCSectionsMD || MMRA) { InsertedListener = std::make_unique( DAG, [&](SDNode *) { NodeInserted = true; }); } visit(I.getOpcode(), I); if (!I.isTerminator() && !HasTailCall && !isa(I)) // statepoints handle their exports internally CopyToExportRegsIfNeeded(&I); // Handle metadata. if (PCSectionsMD || MMRA) { auto It = NodeMap.find(&I); if (It != NodeMap.end()) { if (PCSectionsMD) DAG.addPCSections(It->second.getNode(), PCSectionsMD); if (MMRA) DAG.addMMRAMetadata(It->second.getNode(), MMRA); } else if (NodeInserted) { // This should not happen; if it does, don't let it go unnoticed so we can // fix it. Relevant visit*() function is probably missing a setValue(). errs() << "warning: loosing !pcsections and/or !mmra metadata [" << I.getModule()->getName() << "]\n"; LLVM_DEBUG(I.dump()); assert(false); } } CurInst = nullptr; } void SelectionDAGBuilder::visitPHI(const PHINode &) { llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!"); } void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) { // Note: this doesn't use InstVisitor, because it has to work with // ConstantExpr's in addition to instructions. switch (Opcode) { default: llvm_unreachable("Unknown instruction type encountered!"); // Build the switch statement using the Instruction.def file. #define HANDLE_INST(NUM, OPCODE, CLASS) \ case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break; #include "llvm/IR/Instruction.def" } } static bool handleDanglingVariadicDebugInfo(SelectionDAG &DAG, DILocalVariable *Variable, DebugLoc DL, unsigned Order, SmallVectorImpl &Values, DIExpression *Expression) { // For variadic dbg_values we will now insert an undef. // FIXME: We can potentially recover these! SmallVector Locs; for (const Value *V : Values) { auto *Undef = UndefValue::get(V->getType()); Locs.push_back(SDDbgOperand::fromConst(Undef)); } SDDbgValue *SDV = DAG.getDbgValueList(Variable, Expression, Locs, {}, /*IsIndirect=*/false, DL, Order, /*IsVariadic=*/true); DAG.AddDbgValue(SDV, /*isParameter=*/false); return true; } void SelectionDAGBuilder::addDanglingDebugInfo(SmallVectorImpl &Values, DILocalVariable *Var, DIExpression *Expr, bool IsVariadic, DebugLoc DL, unsigned Order) { if (IsVariadic) { handleDanglingVariadicDebugInfo(DAG, Var, DL, Order, Values, Expr); return; } // TODO: Dangling debug info will eventually either be resolved or produce // an Undef DBG_VALUE. However in the resolution case, a gap may appear // between the original dbg.value location and its resolved DBG_VALUE, // which we should ideally fill with an extra Undef DBG_VALUE. assert(Values.size() == 1); DanglingDebugInfoMap[Values[0]].emplace_back(Var, Expr, DL, Order); } void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable, const DIExpression *Expr) { auto isMatchingDbgValue = [&](DanglingDebugInfo &DDI) { DIVariable *DanglingVariable = DDI.getVariable(); DIExpression *DanglingExpr = DDI.getExpression(); if (DanglingVariable == Variable && Expr->fragmentsOverlap(DanglingExpr)) { LLVM_DEBUG(dbgs() << "Dropping dangling debug info for " << printDDI(nullptr, DDI) << "\n"); return true; } return false; }; for (auto &DDIMI : DanglingDebugInfoMap) { DanglingDebugInfoVector &DDIV = DDIMI.second; // If debug info is to be dropped, run it through final checks to see // whether it can be salvaged. for (auto &DDI : DDIV) if (isMatchingDbgValue(DDI)) salvageUnresolvedDbgValue(DDIMI.first, DDI); erase_if(DDIV, isMatchingDbgValue); } } // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V, // generate the debug data structures now that we've seen its definition. void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, SDValue Val) { auto DanglingDbgInfoIt = DanglingDebugInfoMap.find(V); if (DanglingDbgInfoIt == DanglingDebugInfoMap.end()) return; DanglingDebugInfoVector &DDIV = DanglingDbgInfoIt->second; for (auto &DDI : DDIV) { DebugLoc DL = DDI.getDebugLoc(); unsigned ValSDNodeOrder = Val.getNode()->getIROrder(); unsigned DbgSDNodeOrder = DDI.getSDNodeOrder(); DILocalVariable *Variable = DDI.getVariable(); DIExpression *Expr = DDI.getExpression(); assert(Variable->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); SDDbgValue *SDV; if (Val.getNode()) { // FIXME: I doubt that it is correct to resolve a dangling DbgValue as a // FuncArgumentDbgValue (it would be hoisted to the function entry, and if // we couldn't resolve it directly when examining the DbgValue intrinsic // in the first place we should not be more successful here). Unless we // have some test case that prove this to be correct we should avoid // calling EmitFuncArgumentDbgValue here. if (!EmitFuncArgumentDbgValue(V, Variable, Expr, DL, FuncArgumentDbgValueKind::Value, Val)) { LLVM_DEBUG(dbgs() << "Resolve dangling debug info for " << printDDI(V, DDI) << "\n"); LLVM_DEBUG(dbgs() << " By mapping to:\n "; Val.dump()); // Increase the SDNodeOrder for the DbgValue here to make sure it is // inserted after the definition of Val when emitting the instructions // after ISel. An alternative could be to teach // ScheduleDAGSDNodes::EmitSchedule to delay the insertion properly. LLVM_DEBUG(if (ValSDNodeOrder > DbgSDNodeOrder) dbgs() << "changing SDNodeOrder from " << DbgSDNodeOrder << " to " << ValSDNodeOrder << "\n"); SDV = getDbgValue(Val, Variable, Expr, DL, std::max(DbgSDNodeOrder, ValSDNodeOrder)); DAG.AddDbgValue(SDV, false); } else LLVM_DEBUG(dbgs() << "Resolved dangling debug info for " << printDDI(V, DDI) << " in EmitFuncArgumentDbgValue\n"); } else { LLVM_DEBUG(dbgs() << "Dropping debug info for " << printDDI(V, DDI) << "\n"); auto Undef = UndefValue::get(V->getType()); auto SDV = DAG.getConstantDbgValue(Variable, Expr, Undef, DL, DbgSDNodeOrder); DAG.AddDbgValue(SDV, false); } } DDIV.clear(); } void SelectionDAGBuilder::salvageUnresolvedDbgValue(const Value *V, DanglingDebugInfo &DDI) { // TODO: For the variadic implementation, instead of only checking the fail // state of `handleDebugValue`, we need know specifically which values were // invalid, so that we attempt to salvage only those values when processing // a DIArgList. const Value *OrigV = V; DILocalVariable *Var = DDI.getVariable(); DIExpression *Expr = DDI.getExpression(); DebugLoc DL = DDI.getDebugLoc(); unsigned SDOrder = DDI.getSDNodeOrder(); // Currently we consider only dbg.value intrinsics -- we tell the salvager // that DW_OP_stack_value is desired. bool StackValue = true; // Can this Value can be encoded without any further work? if (handleDebugValue(V, Var, Expr, DL, SDOrder, /*IsVariadic=*/false)) return; // Attempt to salvage back through as many instructions as possible. Bail if // a non-instruction is seen, such as a constant expression or global // variable. FIXME: Further work could recover those too. while (isa(V)) { const Instruction &VAsInst = *cast(V); // Temporary "0", awaiting real implementation. SmallVector Ops; SmallVector AdditionalValues; V = salvageDebugInfoImpl(const_cast(VAsInst), Expr->getNumLocationOperands(), Ops, AdditionalValues); // If we cannot salvage any further, and haven't yet found a suitable debug // expression, bail out. if (!V) break; // TODO: If AdditionalValues isn't empty, then the salvage can only be // represented with a DBG_VALUE_LIST, so we give up. When we have support // here for variadic dbg_values, remove that condition. if (!AdditionalValues.empty()) break; // New value and expr now represent this debuginfo. Expr = DIExpression::appendOpsToArg(Expr, Ops, 0, StackValue); // Some kind of simplification occurred: check whether the operand of the // salvaged debug expression can be encoded in this DAG. if (handleDebugValue(V, Var, Expr, DL, SDOrder, /*IsVariadic=*/false)) { LLVM_DEBUG( dbgs() << "Salvaged debug location info for:\n " << *Var << "\n" << *OrigV << "\nBy stripping back to:\n " << *V << "\n"); return; } } // This was the final opportunity to salvage this debug information, and it // couldn't be done. Place an undef DBG_VALUE at this location to terminate // any earlier variable location. assert(OrigV && "V shouldn't be null"); auto *Undef = UndefValue::get(OrigV->getType()); auto *SDV = DAG.getConstantDbgValue(Var, Expr, Undef, DL, SDNodeOrder); DAG.AddDbgValue(SDV, false); LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n " << printDDI(OrigV, DDI) << "\n"); } void SelectionDAGBuilder::handleKillDebugValue(DILocalVariable *Var, DIExpression *Expr, DebugLoc DbgLoc, unsigned Order) { Value *Poison = PoisonValue::get(Type::getInt1Ty(*Context)); DIExpression *NewExpr = const_cast(DIExpression::convertToUndefExpression(Expr)); handleDebugValue(Poison, Var, NewExpr, DbgLoc, Order, /*IsVariadic*/ false); } bool SelectionDAGBuilder::handleDebugValue(ArrayRef Values, DILocalVariable *Var, DIExpression *Expr, DebugLoc DbgLoc, unsigned Order, bool IsVariadic) { if (Values.empty()) return true; // Filter EntryValue locations out early. if (visitEntryValueDbgValue(Values, Var, Expr, DbgLoc)) return true; SmallVector LocationOps; SmallVector Dependencies; for (const Value *V : Values) { // Constant value. if (isa(V) || isa(V) || isa(V) || isa(V)) { LocationOps.emplace_back(SDDbgOperand::fromConst(V)); continue; } // Look through IntToPtr constants. if (auto *CE = dyn_cast(V)) if (CE->getOpcode() == Instruction::IntToPtr) { LocationOps.emplace_back(SDDbgOperand::fromConst(CE->getOperand(0))); continue; } // If the Value is a frame index, we can create a FrameIndex debug value // without relying on the DAG at all. if (const AllocaInst *AI = dyn_cast(V)) { auto SI = FuncInfo.StaticAllocaMap.find(AI); if (SI != FuncInfo.StaticAllocaMap.end()) { LocationOps.emplace_back(SDDbgOperand::fromFrameIdx(SI->second)); continue; } } // Do not use getValue() in here; we don't want to generate code at // this point if it hasn't been done yet. SDValue N = NodeMap[V]; if (!N.getNode() && isa(V)) // Check unused arguments map. N = UnusedArgNodeMap[V]; if (N.getNode()) { // Only emit func arg dbg value for non-variadic dbg.values for now. if (!IsVariadic && EmitFuncArgumentDbgValue(V, Var, Expr, DbgLoc, FuncArgumentDbgValueKind::Value, N)) return true; if (auto *FISDN = dyn_cast(N.getNode())) { // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can // describe stack slot locations. // // Consider "int x = 0; int *px = &x;". There are two kinds of // interesting debug values here after optimization: // // dbg.value(i32* %px, !"int *px", !DIExpression()), and // dbg.value(i32* %px, !"int x", !DIExpression(DW_OP_deref)) // // Both describe the direct values of their associated variables. Dependencies.push_back(N.getNode()); LocationOps.emplace_back(SDDbgOperand::fromFrameIdx(FISDN->getIndex())); continue; } LocationOps.emplace_back( SDDbgOperand::fromNode(N.getNode(), N.getResNo())); continue; } const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // Special rules apply for the first dbg.values of parameter variables in a // function. Identify them by the fact they reference Argument Values, that // they're parameters, and they are parameters of the current function. We // need to let them dangle until they get an SDNode. bool IsParamOfFunc = isa(V) && Var->isParameter() && !DbgLoc.getInlinedAt(); if (IsParamOfFunc) return false; // The value is not used in this block yet (or it would have an SDNode). // We still want the value to appear for the user if possible -- if it has // an associated VReg, we can refer to that instead. auto VMI = FuncInfo.ValueMap.find(V); if (VMI != FuncInfo.ValueMap.end()) { unsigned Reg = VMI->second; // If this is a PHI node, it may be split up into several MI PHI nodes // (in FunctionLoweringInfo::set). RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, V->getType(), std::nullopt); if (RFV.occupiesMultipleRegs()) { // FIXME: We could potentially support variadic dbg_values here. if (IsVariadic) return false; unsigned Offset = 0; unsigned BitsToDescribe = 0; if (auto VarSize = Var->getSizeInBits()) BitsToDescribe = *VarSize; if (auto Fragment = Expr->getFragmentInfo()) BitsToDescribe = Fragment->SizeInBits; for (const auto &RegAndSize : RFV.getRegsAndSizes()) { // Bail out if all bits are described already. if (Offset >= BitsToDescribe) break; // TODO: handle scalable vectors. unsigned RegisterSize = RegAndSize.second; unsigned FragmentSize = (Offset + RegisterSize > BitsToDescribe) ? BitsToDescribe - Offset : RegisterSize; auto FragmentExpr = DIExpression::createFragmentExpression( Expr, Offset, FragmentSize); if (!FragmentExpr) continue; SDDbgValue *SDV = DAG.getVRegDbgValue( Var, *FragmentExpr, RegAndSize.first, false, DbgLoc, Order); DAG.AddDbgValue(SDV, false); Offset += RegisterSize; } return true; } // We can use simple vreg locations for variadic dbg_values as well. LocationOps.emplace_back(SDDbgOperand::fromVReg(Reg)); continue; } // We failed to create a SDDbgOperand for V. return false; } // We have created a SDDbgOperand for each Value in Values. assert(!LocationOps.empty()); SDDbgValue *SDV = DAG.getDbgValueList(Var, Expr, LocationOps, Dependencies, /*IsIndirect=*/false, DbgLoc, Order, IsVariadic); DAG.AddDbgValue(SDV, /*isParameter=*/false); return true; } void SelectionDAGBuilder::resolveOrClearDbgInfo() { // Try to fixup any remaining dangling debug info -- and drop it if we can't. for (auto &Pair : DanglingDebugInfoMap) for (auto &DDI : Pair.second) salvageUnresolvedDbgValue(const_cast(Pair.first), DDI); clearDanglingDebugInfo(); } /// getCopyFromRegs - If there was virtual register allocated for the value V /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise. SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) { DenseMap::iterator It = FuncInfo.ValueMap.find(V); SDValue Result; if (It != FuncInfo.ValueMap.end()) { Register InReg = It->second; RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), DAG.getDataLayout(), InReg, Ty, std::nullopt); // This is not an ABI copy. SDValue Chain = DAG.getEntryNode(); Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); resolveDanglingDebugInfo(V, Result); } return Result; } /// getValue - Return an SDValue for the given Value. SDValue SelectionDAGBuilder::getValue(const Value *V) { // If we already have an SDValue for this value, use it. It's important // to do this first, so that we don't create a CopyFromReg if we already // have a regular SDValue. SDValue &N = NodeMap[V]; if (N.getNode()) return N; // If there's a virtual register allocated and initialized for this // value, use it. if (SDValue copyFromReg = getCopyFromRegs(V, V->getType())) return copyFromReg; // Otherwise create a new SDValue and remember it. SDValue Val = getValueImpl(V); NodeMap[V] = Val; resolveDanglingDebugInfo(V, Val); return Val; } /// getNonRegisterValue - Return an SDValue for the given Value, but /// don't look in FuncInfo.ValueMap for a virtual register. SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) { // If we already have an SDValue for this value, use it. SDValue &N = NodeMap[V]; if (N.getNode()) { if (isIntOrFPConstant(N)) { // Remove the debug location from the node as the node is about to be used // in a location which may differ from the original debug location. This // is relevant to Constant and ConstantFP nodes because they can appear // as constant expressions inside PHI nodes. N->setDebugLoc(DebugLoc()); } return N; } // Otherwise create a new SDValue and remember it. SDValue Val = getValueImpl(V); NodeMap[V] = Val; resolveDanglingDebugInfo(V, Val); return Val; } /// getValueImpl - Helper function for getValue and getNonRegisterValue. /// Create an SDValue for the given value. SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (const Constant *C = dyn_cast(V)) { EVT VT = TLI.getValueType(DAG.getDataLayout(), V->getType(), true); if (const ConstantInt *CI = dyn_cast(C)) return DAG.getConstant(*CI, getCurSDLoc(), VT); if (const GlobalValue *GV = dyn_cast(C)) return DAG.getGlobalAddress(GV, getCurSDLoc(), VT); if (const ConstantPtrAuth *CPA = dyn_cast(C)) { return DAG.getNode(ISD::PtrAuthGlobalAddress, getCurSDLoc(), VT, getValue(CPA->getPointer()), getValue(CPA->getKey()), getValue(CPA->getAddrDiscriminator()), getValue(CPA->getDiscriminator())); } if (isa(C)) { unsigned AS = V->getType()->getPointerAddressSpace(); return DAG.getConstant(0, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout(), AS)); } if (match(C, m_VScale())) return DAG.getVScale(getCurSDLoc(), VT, APInt(VT.getSizeInBits(), 1)); if (const ConstantFP *CFP = dyn_cast(C)) return DAG.getConstantFP(*CFP, getCurSDLoc(), VT); if (isa(C) && !V->getType()->isAggregateType()) return DAG.getUNDEF(VT); if (const ConstantExpr *CE = dyn_cast(C)) { visit(CE->getOpcode(), *CE); SDValue N1 = NodeMap[V]; assert(N1.getNode() && "visit didn't populate the NodeMap!"); return N1; } if (isa(C) || isa(C)) { SmallVector Constants; for (const Use &U : C->operands()) { SDNode *Val = getValue(U).getNode(); // If the operand is an empty aggregate, there are no values. if (!Val) continue; // Add each leaf value from the operand to the Constants list // to form a flattened list of all the values. for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i) Constants.push_back(SDValue(Val, i)); } return DAG.getMergeValues(Constants, getCurSDLoc()); } if (const ConstantDataSequential *CDS = dyn_cast(C)) { SmallVector Ops; for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode(); // Add each leaf value from the operand to the Constants list // to form a flattened list of all the values. for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i) Ops.push_back(SDValue(Val, i)); } if (isa(CDS->getType())) return DAG.getMergeValues(Ops, getCurSDLoc()); return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops); } if (C->getType()->isStructTy() || C->getType()->isArrayTy()) { assert((isa(C) || isa(C)) && "Unknown struct or array constant!"); SmallVector ValueVTs; ComputeValueVTs(TLI, DAG.getDataLayout(), C->getType(), ValueVTs); unsigned NumElts = ValueVTs.size(); if (NumElts == 0) return SDValue(); // empty struct SmallVector Constants(NumElts); for (unsigned i = 0; i != NumElts; ++i) { EVT EltVT = ValueVTs[i]; if (isa(C)) Constants[i] = DAG.getUNDEF(EltVT); else if (EltVT.isFloatingPoint()) Constants[i] = DAG.getConstantFP(0, getCurSDLoc(), EltVT); else Constants[i] = DAG.getConstant(0, getCurSDLoc(), EltVT); } return DAG.getMergeValues(Constants, getCurSDLoc()); } if (const BlockAddress *BA = dyn_cast(C)) return DAG.getBlockAddress(BA, VT); if (const auto *Equiv = dyn_cast(C)) return getValue(Equiv->getGlobalValue()); if (const auto *NC = dyn_cast(C)) return getValue(NC->getGlobalValue()); if (VT == MVT::aarch64svcount) { assert(C->isNullValue() && "Can only zero this target type!"); return DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, DAG.getConstant(0, getCurSDLoc(), MVT::nxv16i1)); } VectorType *VecTy = cast(V->getType()); // Now that we know the number and type of the elements, get that number of // elements into the Ops array based on what kind of constant it is. if (const ConstantVector *CV = dyn_cast(C)) { SmallVector Ops; unsigned NumElements = cast(VecTy)->getNumElements(); for (unsigned i = 0; i != NumElements; ++i) Ops.push_back(getValue(CV->getOperand(i))); return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops); } if (isa(C)) { EVT EltVT = TLI.getValueType(DAG.getDataLayout(), VecTy->getElementType()); SDValue Op; if (EltVT.isFloatingPoint()) Op = DAG.getConstantFP(0, getCurSDLoc(), EltVT); else Op = DAG.getConstant(0, getCurSDLoc(), EltVT); return NodeMap[V] = DAG.getSplat(VT, getCurSDLoc(), Op); } llvm_unreachable("Unknown vector constant"); } // If this is a static alloca, generate it as the frameindex instead of // computation. if (const AllocaInst *AI = dyn_cast(V)) { DenseMap::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI != FuncInfo.StaticAllocaMap.end()) return DAG.getFrameIndex( SI->second, TLI.getValueType(DAG.getDataLayout(), AI->getType())); } // If this is an instruction which fast-isel has deferred, select it now. if (const Instruction *Inst = dyn_cast(V)) { Register InReg = FuncInfo.InitializeRegForValue(Inst); RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg, Inst->getType(), std::nullopt); SDValue Chain = DAG.getEntryNode(); return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); } if (const MetadataAsValue *MD = dyn_cast(V)) return DAG.getMDNode(cast(MD->getMetadata())); if (const auto *BB = dyn_cast(V)) return DAG.getBasicBlock(FuncInfo.MBBMap[BB]); llvm_unreachable("Can't get register for value!"); } void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) { auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX; bool IsCoreCLR = Pers == EHPersonality::CoreCLR; bool IsSEH = isAsynchronousEHPersonality(Pers); MachineBasicBlock *CatchPadMBB = FuncInfo.MBB; if (!IsSEH) CatchPadMBB->setIsEHScopeEntry(); // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues. if (IsMSVCCXX || IsCoreCLR) CatchPadMBB->setIsEHFuncletEntry(); } void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) { // Update machine-CFG edge. MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()]; FuncInfo.MBB->addSuccessor(TargetMBB); TargetMBB->setIsEHCatchretTarget(true); DAG.getMachineFunction().setHasEHCatchret(true); auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); bool IsSEH = isAsynchronousEHPersonality(Pers); if (IsSEH) { // If this is not a fall-through branch or optimizations are switched off, // emit the branch. if (TargetMBB != NextBlock(FuncInfo.MBB) || TM.getOptLevel() == CodeGenOptLevel::None) DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(TargetMBB))); return; } // Figure out the funclet membership for the catchret's successor. // This will be used by the FuncletLayout pass to determine how to order the // BB's. // A 'catchret' returns to the outer scope's color. Value *ParentPad = I.getCatchSwitchParentPad(); const BasicBlock *SuccessorColor; if (isa(ParentPad)) SuccessorColor = &FuncInfo.Fn->getEntryBlock(); else SuccessorColor = cast(ParentPad)->getParent(); assert(SuccessorColor && "No parent funclet for catchret!"); MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor]; assert(SuccessorColorMBB && "No MBB for SuccessorColor!"); // Create the terminator node. SDValue Ret = DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(TargetMBB), DAG.getBasicBlock(SuccessorColorMBB)); DAG.setRoot(Ret); } void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) { // Don't emit any special code for the cleanuppad instruction. It just marks // the start of an EH scope/funclet. FuncInfo.MBB->setIsEHScopeEntry(); auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); if (Pers != EHPersonality::Wasm_CXX) { FuncInfo.MBB->setIsEHFuncletEntry(); FuncInfo.MBB->setIsCleanupFuncletEntry(); } } // In wasm EH, even though a catchpad may not catch an exception if a tag does // not match, it is OK to add only the first unwind destination catchpad to the // successors, because there will be at least one invoke instruction within the // catch scope that points to the next unwind destination, if one exists, so // CFGSort cannot mess up with BB sorting order. // (All catchpads with 'catch (type)' clauses have a 'llvm.rethrow' intrinsic // call within them, and catchpads only consisting of 'catch (...)' have a // '__cxa_end_catch' call within them, both of which generate invokes in case // the next unwind destination exists, i.e., the next unwind destination is not // the caller.) // // Having at most one EH pad successor is also simpler and helps later // transformations. // // For example, // current: // invoke void @foo to ... unwind label %catch.dispatch // catch.dispatch: // %0 = catchswitch within ... [label %catch.start] unwind label %next // catch.start: // ... // ... in this BB or some other child BB dominated by this BB there will be an // invoke that points to 'next' BB as an unwind destination // // next: ; We don't need to add this to 'current' BB's successor // ... static void findWasmUnwindDestinations( FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB, BranchProbability Prob, SmallVectorImpl> &UnwindDests) { while (EHPadBB) { const Instruction *Pad = EHPadBB->getFirstNonPHI(); if (isa(Pad)) { // Stop on cleanup pads. UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob); UnwindDests.back().first->setIsEHScopeEntry(); break; } else if (const auto *CatchSwitch = dyn_cast(Pad)) { // Add the catchpad handlers to the possible destinations. We don't // continue to the unwind destination of the catchswitch for wasm. for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) { UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob); UnwindDests.back().first->setIsEHScopeEntry(); } break; } else { continue; } } } /// When an invoke or a cleanupret unwinds to the next EH pad, there are /// many places it could ultimately go. In the IR, we have a single unwind /// destination, but in the machine CFG, we enumerate all the possible blocks. /// This function skips over imaginary basic blocks that hold catchswitch /// instructions, and finds all the "real" machine /// basic block destinations. As those destinations may not be successors of /// EHPadBB, here we also calculate the edge probability to those destinations. /// The passed-in Prob is the edge probability to EHPadBB. static void findUnwindDestinations( FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB, BranchProbability Prob, SmallVectorImpl> &UnwindDests) { EHPersonality Personality = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX; bool IsCoreCLR = Personality == EHPersonality::CoreCLR; bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX; bool IsSEH = isAsynchronousEHPersonality(Personality); if (IsWasmCXX) { findWasmUnwindDestinations(FuncInfo, EHPadBB, Prob, UnwindDests); assert(UnwindDests.size() <= 1 && "There should be at most one unwind destination for wasm"); return; } while (EHPadBB) { const Instruction *Pad = EHPadBB->getFirstNonPHI(); BasicBlock *NewEHPadBB = nullptr; if (isa(Pad)) { // Stop on landingpads. They are not funclets. UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob); break; } else if (isa(Pad)) { // Stop on cleanup pads. Cleanups are always funclet entries for all known // personalities. UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob); UnwindDests.back().first->setIsEHScopeEntry(); UnwindDests.back().first->setIsEHFuncletEntry(); break; } else if (const auto *CatchSwitch = dyn_cast(Pad)) { // Add the catchpad handlers to the possible destinations. for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) { UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob); // For MSVC++ and the CLR, catchblocks are funclets and need prologues. if (IsMSVCCXX || IsCoreCLR) UnwindDests.back().first->setIsEHFuncletEntry(); if (!IsSEH) UnwindDests.back().first->setIsEHScopeEntry(); } NewEHPadBB = CatchSwitch->getUnwindDest(); } else { continue; } BranchProbabilityInfo *BPI = FuncInfo.BPI; if (BPI && NewEHPadBB) Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB); EHPadBB = NewEHPadBB; } } void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) { // Update successor info. SmallVector, 1> UnwindDests; auto UnwindDest = I.getUnwindDest(); BranchProbabilityInfo *BPI = FuncInfo.BPI; BranchProbability UnwindDestProb = (BPI && UnwindDest) ? BPI->getEdgeProbability(FuncInfo.MBB->getBasicBlock(), UnwindDest) : BranchProbability::getZero(); findUnwindDestinations(FuncInfo, UnwindDest, UnwindDestProb, UnwindDests); for (auto &UnwindDest : UnwindDests) { UnwindDest.first->setIsEHPad(); addSuccessorWithProb(FuncInfo.MBB, UnwindDest.first, UnwindDest.second); } FuncInfo.MBB->normalizeSuccProbs(); // Create the terminator node. - SDValue Ret = - DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, getControlRoot()); + MachineBasicBlock *CleanupPadMBB = + FuncInfo.MBBMap[I.getCleanupPad()->getParent()]; + SDValue Ret = DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, + getControlRoot(), DAG.getBasicBlock(CleanupPadMBB)); DAG.setRoot(Ret); } void SelectionDAGBuilder::visitCatchSwitch(const CatchSwitchInst &CSI) { report_fatal_error("visitCatchSwitch not yet implemented!"); } void SelectionDAGBuilder::visitRet(const ReturnInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); auto &DL = DAG.getDataLayout(); SDValue Chain = getControlRoot(); SmallVector Outs; SmallVector OutVals; // Calls to @llvm.experimental.deoptimize don't generate a return value, so // lower // // %val = call @llvm.experimental.deoptimize() // ret %val // // differently. if (I.getParent()->getTerminatingDeoptimizeCall()) { LowerDeoptimizingReturn(); return; } if (!FuncInfo.CanLowerReturn) { unsigned DemoteReg = FuncInfo.DemoteRegister; const Function *F = I.getParent()->getParent(); // Emit a store of the return value through the virtual register. // Leave Outs empty so that LowerReturn won't try to load return // registers the usual way. SmallVector PtrValueVTs; ComputeValueVTs(TLI, DL, PointerType::get(F->getContext(), DAG.getDataLayout().getAllocaAddrSpace()), PtrValueVTs); SDValue RetPtr = DAG.getCopyFromReg(Chain, getCurSDLoc(), DemoteReg, PtrValueVTs[0]); SDValue RetOp = getValue(I.getOperand(0)); SmallVector ValueVTs, MemVTs; SmallVector Offsets; ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &MemVTs, &Offsets, 0); unsigned NumValues = ValueVTs.size(); SmallVector Chains(NumValues); Align BaseAlign = DL.getPrefTypeAlign(I.getOperand(0)->getType()); for (unsigned i = 0; i != NumValues; ++i) { // An aggregate return value cannot wrap around the address space, so // offsets to its parts don't wrap either. SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr, TypeSize::getFixed(Offsets[i])); SDValue Val = RetOp.getValue(RetOp.getResNo() + i); if (MemVTs[i] != ValueVTs[i]) Val = DAG.getPtrExtOrTrunc(Val, getCurSDLoc(), MemVTs[i]); Chains[i] = DAG.getStore( Chain, getCurSDLoc(), Val, // FIXME: better loc info would be nice. Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), commonAlignment(BaseAlign, Offsets[i])); } Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, Chains); } else if (I.getNumOperands() != 0) { SmallVector ValueVTs; ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); if (NumValues) { SDValue RetOp = getValue(I.getOperand(0)); const Function *F = I.getParent()->getParent(); bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters( I.getOperand(0)->getType(), F->getCallingConv(), /*IsVarArg*/ false, DL); ISD::NodeType ExtendKind = ISD::ANY_EXTEND; if (F->getAttributes().hasRetAttr(Attribute::SExt)) ExtendKind = ISD::SIGN_EXTEND; else if (F->getAttributes().hasRetAttr(Attribute::ZExt)) ExtendKind = ISD::ZERO_EXTEND; LLVMContext &Context = F->getContext(); bool RetInReg = F->getAttributes().hasRetAttr(Attribute::InReg); for (unsigned j = 0; j != NumValues; ++j) { EVT VT = ValueVTs[j]; if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind); CallingConv::ID CC = F->getCallingConv(); unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, CC, VT); MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, CC, VT); SmallVector Parts(NumParts); getCopyToParts(DAG, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + j), &Parts[0], NumParts, PartVT, &I, CC, ExtendKind); // 'inreg' on function refers to return value ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); if (RetInReg) Flags.setInReg(); if (I.getOperand(0)->getType()->isPointerTy()) { Flags.setPointer(); Flags.setPointerAddrSpace( cast(I.getOperand(0)->getType())->getAddressSpace()); } if (NeedsRegBlock) { Flags.setInConsecutiveRegs(); if (j == NumValues - 1) Flags.setInConsecutiveRegsLast(); } // Propagate extension type if any if (ExtendKind == ISD::SIGN_EXTEND) Flags.setSExt(); else if (ExtendKind == ISD::ZERO_EXTEND) Flags.setZExt(); for (unsigned i = 0; i < NumParts; ++i) { Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType().getSimpleVT(), VT, /*isfixed=*/true, 0, 0)); OutVals.push_back(Parts[i]); } } } } // Push in swifterror virtual register as the last element of Outs. This makes // sure swifterror virtual register will be returned in the swifterror // physical register. const Function *F = I.getParent()->getParent(); if (TLI.supportSwiftError() && F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) { assert(SwiftError.getFunctionArg() && "Need a swift error argument"); ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); Flags.setSwiftError(); Outs.push_back(ISD::OutputArg( Flags, /*vt=*/TLI.getPointerTy(DL), /*argvt=*/EVT(TLI.getPointerTy(DL)), /*isfixed=*/true, /*origidx=*/1, /*partOffs=*/0)); // Create SDNode for the swifterror virtual register. OutVals.push_back( DAG.getRegister(SwiftError.getOrCreateVRegUseAt( &I, FuncInfo.MBB, SwiftError.getFunctionArg()), EVT(TLI.getPointerTy(DL)))); } bool isVarArg = DAG.getMachineFunction().getFunction().isVarArg(); CallingConv::ID CallConv = DAG.getMachineFunction().getFunction().getCallingConv(); Chain = DAG.getTargetLoweringInfo().LowerReturn( Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG); // Verify that the target's LowerReturn behaved as expected. assert(Chain.getNode() && Chain.getValueType() == MVT::Other && "LowerReturn didn't return a valid chain!"); // Update the DAG with the new chain value resulting from return lowering. DAG.setRoot(Chain); } /// CopyToExportRegsIfNeeded - If the given value has virtual registers /// created for it, emit nodes to copy the value into the virtual /// registers. void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) { // Skip empty types if (V->getType()->isEmptyTy()) return; DenseMap::iterator VMI = FuncInfo.ValueMap.find(V); if (VMI != FuncInfo.ValueMap.end()) { assert((!V->use_empty() || isa(V)) && "Unused value assigned virtual registers!"); CopyValueToVirtualRegister(V, VMI->second); } } /// ExportFromCurrentBlock - If this condition isn't known to be exported from /// the current basic block, add it to ValueMap now so that we'll get a /// CopyTo/FromReg. void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) { // No need to export constants. if (!isa(V) && !isa(V)) return; // Already exported? if (FuncInfo.isExportedInst(V)) return; Register Reg = FuncInfo.InitializeRegForValue(V); CopyValueToVirtualRegister(V, Reg); } bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB) { // The operands of the setcc have to be in this block. We don't know // how to export them from some other block. if (const Instruction *VI = dyn_cast(V)) { // Can export from current BB. if (VI->getParent() == FromBB) return true; // Is already exported, noop. return FuncInfo.isExportedInst(V); } // If this is an argument, we can export it if the BB is the entry block or // if it is already exported. if (isa(V)) { if (FromBB->isEntryBlock()) return true; // Otherwise, can only export this if it is already exported. return FuncInfo.isExportedInst(V); } // Otherwise, constants can always be exported. return true; } /// Return branch probability calculated by BranchProbabilityInfo for IR blocks. BranchProbability SelectionDAGBuilder::getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const { BranchProbabilityInfo *BPI = FuncInfo.BPI; const BasicBlock *SrcBB = Src->getBasicBlock(); const BasicBlock *DstBB = Dst->getBasicBlock(); if (!BPI) { // If BPI is not available, set the default probability as 1 / N, where N is // the number of successors. auto SuccSize = std::max(succ_size(SrcBB), 1); return BranchProbability(1, SuccSize); } return BPI->getEdgeProbability(SrcBB, DstBB); } void SelectionDAGBuilder::addSuccessorWithProb(MachineBasicBlock *Src, MachineBasicBlock *Dst, BranchProbability Prob) { if (!FuncInfo.BPI) Src->addSuccessorWithoutProb(Dst); else { if (Prob.isUnknown()) Prob = getEdgeProbability(Src, Dst); Src->addSuccessor(Dst, Prob); } } static bool InBlock(const Value *V, const BasicBlock *BB) { if (const Instruction *I = dyn_cast(V)) return I->getParent() == BB; return true; } /// EmitBranchForMergedCondition - Helper method for FindMergedConditions. /// This function emits a branch and is used at the leaves of an OR or an /// AND operator tree. void SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, BranchProbability TProb, BranchProbability FProb, bool InvertCond) { const BasicBlock *BB = CurBB->getBasicBlock(); // If the leaf of the tree is a comparison, merge the condition into // the caseblock. if (const CmpInst *BOp = dyn_cast(Cond)) { // The operands of the cmp have to be in this block. We don't know // how to export them from some other block. If this is the first block // of the sequence, no exporting is needed. if (CurBB == SwitchBB || (isExportableFromCurrentBlock(BOp->getOperand(0), BB) && isExportableFromCurrentBlock(BOp->getOperand(1), BB))) { ISD::CondCode Condition; if (const ICmpInst *IC = dyn_cast(Cond)) { ICmpInst::Predicate Pred = InvertCond ? IC->getInversePredicate() : IC->getPredicate(); Condition = getICmpCondCode(Pred); } else { const FCmpInst *FC = cast(Cond); FCmpInst::Predicate Pred = InvertCond ? FC->getInversePredicate() : FC->getPredicate(); Condition = getFCmpCondCode(Pred); if (TM.Options.NoNaNsFPMath) Condition = getFCmpCodeWithoutNaN(Condition); } CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr, TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb); SL->SwitchCases.push_back(CB); return; } } // Create a CaseBlock record representing this branch. ISD::CondCode Opc = InvertCond ? ISD::SETNE : ISD::SETEQ; CaseBlock CB(Opc, Cond, ConstantInt::getTrue(*DAG.getContext()), nullptr, TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb); SL->SwitchCases.push_back(CB); } // Collect dependencies on V recursively. This is used for the cost analysis in // `shouldKeepJumpConditionsTogether`. static bool collectInstructionDeps( SmallMapVector *Deps, const Value *V, SmallMapVector *Necessary = nullptr, unsigned Depth = 0) { // Return false if we have an incomplete count. if (Depth >= SelectionDAG::MaxRecursionDepth) return false; auto *I = dyn_cast(V); if (I == nullptr) return true; if (Necessary != nullptr) { // This instruction is necessary for the other side of the condition so // don't count it. if (Necessary->contains(I)) return true; } // Already added this dep. if (!Deps->try_emplace(I, false).second) return true; for (unsigned OpIdx = 0, E = I->getNumOperands(); OpIdx < E; ++OpIdx) if (!collectInstructionDeps(Deps, I->getOperand(OpIdx), Necessary, Depth + 1)) return false; return true; } bool SelectionDAGBuilder::shouldKeepJumpConditionsTogether( const FunctionLoweringInfo &FuncInfo, const BranchInst &I, Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs, TargetLoweringBase::CondMergingParams Params) const { if (I.getNumSuccessors() != 2) return false; if (!I.isConditional()) return false; if (Params.BaseCost < 0) return false; // Baseline cost. InstructionCost CostThresh = Params.BaseCost; BranchProbabilityInfo *BPI = nullptr; if (Params.LikelyBias || Params.UnlikelyBias) BPI = FuncInfo.BPI; if (BPI != nullptr) { // See if we are either likely to get an early out or compute both lhs/rhs // of the condition. BasicBlock *IfFalse = I.getSuccessor(0); BasicBlock *IfTrue = I.getSuccessor(1); std::optional Likely; if (BPI->isEdgeHot(I.getParent(), IfTrue)) Likely = true; else if (BPI->isEdgeHot(I.getParent(), IfFalse)) Likely = false; if (Likely) { if (Opc == (*Likely ? Instruction::And : Instruction::Or)) // Its likely we will have to compute both lhs and rhs of condition CostThresh += Params.LikelyBias; else { if (Params.UnlikelyBias < 0) return false; // Its likely we will get an early out. CostThresh -= Params.UnlikelyBias; } } } if (CostThresh <= 0) return false; // Collect "all" instructions that lhs condition is dependent on. // Use map for stable iteration (to avoid non-determanism of iteration of // SmallPtrSet). The `bool` value is just a dummy. SmallMapVector LhsDeps, RhsDeps; collectInstructionDeps(&LhsDeps, Lhs); // Collect "all" instructions that rhs condition is dependent on AND are // dependencies of lhs. This gives us an estimate on which instructions we // stand to save by splitting the condition. if (!collectInstructionDeps(&RhsDeps, Rhs, &LhsDeps)) return false; // Add the compare instruction itself unless its a dependency on the LHS. if (const auto *RhsI = dyn_cast(Rhs)) if (!LhsDeps.contains(RhsI)) RhsDeps.try_emplace(RhsI, false); const auto &TLI = DAG.getTargetLoweringInfo(); const auto &TTI = TLI.getTargetMachine().getTargetTransformInfo(*I.getFunction()); InstructionCost CostOfIncluding = 0; // See if this instruction will need to computed independently of whether RHS // is. Value *BrCond = I.getCondition(); auto ShouldCountInsn = [&RhsDeps, &BrCond](const Instruction *Ins) { for (const auto *U : Ins->users()) { // If user is independent of RHS calculation we don't need to count it. if (auto *UIns = dyn_cast(U)) if (UIns != BrCond && !RhsDeps.contains(UIns)) return false; } return true; }; // Prune instructions from RHS Deps that are dependencies of unrelated // instructions. The value (SelectionDAG::MaxRecursionDepth) is fairly // arbitrary and just meant to cap the how much time we spend in the pruning // loop. Its highly unlikely to come into affect. const unsigned MaxPruneIters = SelectionDAG::MaxRecursionDepth; // Stop after a certain point. No incorrectness from including too many // instructions. for (unsigned PruneIters = 0; PruneIters < MaxPruneIters; ++PruneIters) { const Instruction *ToDrop = nullptr; for (const auto &InsPair : RhsDeps) { if (!ShouldCountInsn(InsPair.first)) { ToDrop = InsPair.first; break; } } if (ToDrop == nullptr) break; RhsDeps.erase(ToDrop); } for (const auto &InsPair : RhsDeps) { // Finally accumulate latency that we can only attribute to computing the // RHS condition. Use latency because we are essentially trying to calculate // the cost of the dependency chain. // Possible TODO: We could try to estimate ILP and make this more precise. CostOfIncluding += TTI.getInstructionCost(InsPair.first, TargetTransformInfo::TCK_Latency); if (CostOfIncluding > CostThresh) return false; } return true; } void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, Instruction::BinaryOps Opc, BranchProbability TProb, BranchProbability FProb, bool InvertCond) { // Skip over not part of the tree and remember to invert op and operands at // next level. Value *NotCond; if (match(Cond, m_OneUse(m_Not(m_Value(NotCond)))) && InBlock(NotCond, CurBB->getBasicBlock())) { FindMergedConditions(NotCond, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb, !InvertCond); return; } const Instruction *BOp = dyn_cast(Cond); const Value *BOpOp0, *BOpOp1; // Compute the effective opcode for Cond, taking into account whether it needs // to be inverted, e.g. // and (not (or A, B)), C // gets lowered as // and (and (not A, not B), C) Instruction::BinaryOps BOpc = (Instruction::BinaryOps)0; if (BOp) { BOpc = match(BOp, m_LogicalAnd(m_Value(BOpOp0), m_Value(BOpOp1))) ? Instruction::And : (match(BOp, m_LogicalOr(m_Value(BOpOp0), m_Value(BOpOp1))) ? Instruction::Or : (Instruction::BinaryOps)0); if (InvertCond) { if (BOpc == Instruction::And) BOpc = Instruction::Or; else if (BOpc == Instruction::Or) BOpc = Instruction::And; } } // If this node is not part of the or/and tree, emit it as a branch. // Note that all nodes in the tree should have same opcode. bool BOpIsInOrAndTree = BOpc && BOpc == Opc && BOp->hasOneUse(); if (!BOpIsInOrAndTree || BOp->getParent() != CurBB->getBasicBlock() || !InBlock(BOpOp0, CurBB->getBasicBlock()) || !InBlock(BOpOp1, CurBB->getBasicBlock())) { EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, TProb, FProb, InvertCond); return; } // Create TmpBB after CurBB. MachineFunction::iterator BBI(CurBB); MachineFunction &MF = DAG.getMachineFunction(); MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock()); CurBB->getParent()->insert(++BBI, TmpBB); if (Opc == Instruction::Or) { // Codegen X | Y as: // BB1: // jmp_if_X TBB // jmp TmpBB // TmpBB: // jmp_if_Y TBB // jmp FBB // // We have flexibility in setting Prob for BB1 and Prob for TmpBB. // The requirement is that // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) // = TrueProb for original BB. // Assuming the original probabilities are A and B, one choice is to set // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to // A/(1+B) and 2B/(1+B). This choice assumes that // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. // Another choice is to assume TrueProb for BB1 equals to TrueProb for // TmpBB, but the math is more complicated. auto NewTrueProb = TProb / 2; auto NewFalseProb = TProb / 2 + FProb; // Emit the LHS condition. FindMergedConditions(BOpOp0, TBB, TmpBB, CurBB, SwitchBB, Opc, NewTrueProb, NewFalseProb, InvertCond); // Normalize A/2 and B to get A/(1+B) and 2B/(1+B). SmallVector Probs{TProb / 2, FProb}; BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); // Emit the RHS condition into TmpBB. FindMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0], Probs[1], InvertCond); } else { assert(Opc == Instruction::And && "Unknown merge op!"); // Codegen X & Y as: // BB1: // jmp_if_X TmpBB // jmp FBB // TmpBB: // jmp_if_Y TBB // jmp FBB // // This requires creation of TmpBB after CurBB. // We have flexibility in setting Prob for BB1 and Prob for TmpBB. // The requirement is that // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) // = FalseProb for original BB. // Assuming the original probabilities are A and B, one choice is to set // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 == // TrueProb for BB1 * FalseProb for TmpBB. auto NewTrueProb = TProb + FProb / 2; auto NewFalseProb = FProb / 2; // Emit the LHS condition. FindMergedConditions(BOpOp0, TmpBB, FBB, CurBB, SwitchBB, Opc, NewTrueProb, NewFalseProb, InvertCond); // Normalize A and B/2 to get 2A/(1+A) and B/(1+A). SmallVector Probs{TProb, FProb / 2}; BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); // Emit the RHS condition into TmpBB. FindMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0], Probs[1], InvertCond); } } /// If the set of cases should be emitted as a series of branches, return true. /// If we should emit this as a bunch of and/or'd together conditions, return /// false. bool SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector &Cases) { if (Cases.size() != 2) return true; // If this is two comparisons of the same values or'd or and'd together, they // will get folded into a single comparison, so don't emit two blocks. if ((Cases[0].CmpLHS == Cases[1].CmpLHS && Cases[0].CmpRHS == Cases[1].CmpRHS) || (Cases[0].CmpRHS == Cases[1].CmpLHS && Cases[0].CmpLHS == Cases[1].CmpRHS)) { return false; } // Handle: (X != null) | (Y != null) --> (X|Y) != 0 // Handle: (X == null) & (Y == null) --> (X|Y) == 0 if (Cases[0].CmpRHS == Cases[1].CmpRHS && Cases[0].CC == Cases[1].CC && isa(Cases[0].CmpRHS) && cast(Cases[0].CmpRHS)->isNullValue()) { if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB) return false; if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB) return false; } return true; } void SelectionDAGBuilder::visitBr(const BranchInst &I) { MachineBasicBlock *BrMBB = FuncInfo.MBB; // Update machine-CFG edges. MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)]; if (I.isUnconditional()) { // Update machine-CFG edges. BrMBB->addSuccessor(Succ0MBB); // If this is not a fall-through branch or optimizations are switched off, // emit the branch. if (Succ0MBB != NextBlock(BrMBB) || TM.getOptLevel() == CodeGenOptLevel::None) { auto Br = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(Succ0MBB)); setValue(&I, Br); DAG.setRoot(Br); } return; } // If this condition is one of the special cases we handle, do special stuff // now. const Value *CondVal = I.getCondition(); MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)]; // If this is a series of conditions that are or'd or and'd together, emit // this as a sequence of branches instead of setcc's with and/or operations. // As long as jumps are not expensive (exceptions for multi-use logic ops, // unpredictable branches, and vector extracts because those jumps are likely // expensive for any target), this should improve performance. // For example, instead of something like: // cmp A, B // C = seteq // cmp D, E // F = setle // or C, F // jnz foo // Emit: // cmp A, B // je foo // cmp D, E // jle foo const Instruction *BOp = dyn_cast(CondVal); if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp && BOp->hasOneUse() && !I.hasMetadata(LLVMContext::MD_unpredictable)) { Value *Vec; const Value *BOp0, *BOp1; Instruction::BinaryOps Opcode = (Instruction::BinaryOps)0; if (match(BOp, m_LogicalAnd(m_Value(BOp0), m_Value(BOp1)))) Opcode = Instruction::And; else if (match(BOp, m_LogicalOr(m_Value(BOp0), m_Value(BOp1)))) Opcode = Instruction::Or; if (Opcode && !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) && match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value()))) && !shouldKeepJumpConditionsTogether( FuncInfo, I, Opcode, BOp0, BOp1, DAG.getTargetLoweringInfo().getJumpConditionMergingParams( Opcode, BOp0, BOp1))) { FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, Opcode, getEdgeProbability(BrMBB, Succ0MBB), getEdgeProbability(BrMBB, Succ1MBB), /*InvertCond=*/false); // If the compares in later blocks need to use values not currently // exported from this block, export them now. This block should always // be the first entry. assert(SL->SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!"); // Allow some cases to be rejected. if (ShouldEmitAsBranches(SL->SwitchCases)) { for (unsigned i = 1, e = SL->SwitchCases.size(); i != e; ++i) { ExportFromCurrentBlock(SL->SwitchCases[i].CmpLHS); ExportFromCurrentBlock(SL->SwitchCases[i].CmpRHS); } // Emit the branch for this block. visitSwitchCase(SL->SwitchCases[0], BrMBB); SL->SwitchCases.erase(SL->SwitchCases.begin()); return; } // Okay, we decided not to do this, remove any inserted MBB's and clear // SwitchCases. for (unsigned i = 1, e = SL->SwitchCases.size(); i != e; ++i) FuncInfo.MF->erase(SL->SwitchCases[i].ThisBB); SL->SwitchCases.clear(); } } // Create a CaseBlock record representing this branch. CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()), nullptr, Succ0MBB, Succ1MBB, BrMBB, getCurSDLoc()); // Use visitSwitchCase to actually insert the fast branch sequence for this // cond branch. visitSwitchCase(CB, BrMBB); } /// visitSwitchCase - Emits the necessary code to represent a single node in /// the binary search tree resulting from lowering a switch instruction. void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, MachineBasicBlock *SwitchBB) { SDValue Cond; SDValue CondLHS = getValue(CB.CmpLHS); SDLoc dl = CB.DL; if (CB.CC == ISD::SETTRUE) { // Branch or fall through to TrueBB. addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb); SwitchBB->normalizeSuccProbs(); if (CB.TrueBB != NextBlock(SwitchBB)) { DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, getControlRoot(), DAG.getBasicBlock(CB.TrueBB))); } return; } auto &TLI = DAG.getTargetLoweringInfo(); EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), CB.CmpLHS->getType()); // Build the setcc now. if (!CB.CmpMHS) { // Fold "(X == true)" to X and "(X == false)" to !X to // handle common cases produced by branch lowering. if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) && CB.CC == ISD::SETEQ) Cond = CondLHS; else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) && CB.CC == ISD::SETEQ) { SDValue True = DAG.getConstant(1, dl, CondLHS.getValueType()); Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True); } else { SDValue CondRHS = getValue(CB.CmpRHS); // If a pointer's DAG type is larger than its memory type then the DAG // values are zero-extended. This breaks signed comparisons so truncate // back to the underlying type before doing the compare. if (CondLHS.getValueType() != MemVT) { CondLHS = DAG.getPtrExtOrTrunc(CondLHS, getCurSDLoc(), MemVT); CondRHS = DAG.getPtrExtOrTrunc(CondRHS, getCurSDLoc(), MemVT); } Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, CondRHS, CB.CC); } } else { assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now"); const APInt& Low = cast(CB.CmpLHS)->getValue(); const APInt& High = cast(CB.CmpRHS)->getValue(); SDValue CmpOp = getValue(CB.CmpMHS); EVT VT = CmpOp.getValueType(); if (cast(CB.CmpLHS)->isMinValue(true)) { Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, dl, VT), ISD::SETLE); } else { SDValue SUB = DAG.getNode(ISD::SUB, dl, VT, CmpOp, DAG.getConstant(Low, dl, VT)); Cond = DAG.getSetCC(dl, MVT::i1, SUB, DAG.getConstant(High-Low, dl, VT), ISD::SETULE); } } // Update successor info addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb); // TrueBB and FalseBB are always different unless the incoming IR is // degenerate. This only happens when running llc on weird IR. if (CB.TrueBB != CB.FalseBB) addSuccessorWithProb(SwitchBB, CB.FalseBB, CB.FalseProb); SwitchBB->normalizeSuccProbs(); // If the lhs block is the next block, invert the condition so that we can // fall through to the lhs instead of the rhs block. if (CB.TrueBB == NextBlock(SwitchBB)) { std::swap(CB.TrueBB, CB.FalseBB); SDValue True = DAG.getConstant(1, dl, Cond.getValueType()); Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True); } SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, MVT::Other, getControlRoot(), Cond, DAG.getBasicBlock(CB.TrueBB)); setValue(CurInst, BrCond); // Insert the false branch. Do this even if it's a fall through branch, // this makes it easier to do DAG optimizations which require inverting // the branch condition. BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, DAG.getBasicBlock(CB.FalseBB)); DAG.setRoot(BrCond); } /// visitJumpTable - Emit JumpTable node in the current MBB void SelectionDAGBuilder::visitJumpTable(SwitchCG::JumpTable &JT) { // Emit the code for the jump table assert(JT.SL && "Should set SDLoc for SelectionDAG!"); assert(JT.Reg != -1U && "Should lower JT Header first!"); EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); SDValue Index = DAG.getCopyFromReg(getControlRoot(), *JT.SL, JT.Reg, PTy); SDValue Table = DAG.getJumpTable(JT.JTI, PTy); SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, *JT.SL, MVT::Other, Index.getValue(1), Table, Index); DAG.setRoot(BrJumpTable); } /// visitJumpTableHeader - This function emits necessary code to produce index /// in the JumpTable from switch case. void SelectionDAGBuilder::visitJumpTableHeader(SwitchCG::JumpTable &JT, JumpTableHeader &JTH, MachineBasicBlock *SwitchBB) { assert(JT.SL && "Should set SDLoc for SelectionDAG!"); const SDLoc &dl = *JT.SL; // Subtract the lowest switch case value from the value being switched on. SDValue SwitchOp = getValue(JTH.SValue); EVT VT = SwitchOp.getValueType(); SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp, DAG.getConstant(JTH.First, dl, VT)); // The SDNode we just created, which holds the value being switched on minus // the smallest case value, needs to be copied to a virtual register so it // can be used as an index into the jump table in a subsequent basic block. // This value may be smaller or larger than the target's pointer type, and // therefore require extension or truncating. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy(DAG.getDataLayout())); unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy(DAG.getDataLayout())); SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, JumpTableReg, SwitchOp); JT.Reg = JumpTableReg; if (!JTH.FallthroughUnreachable) { // Emit the range check for the jump table, and branch to the default block // for the switch statement if the value being switched on exceeds the // largest case in the switch. SDValue CMP = DAG.getSetCC( dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), Sub.getValueType()), Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), ISD::SETUGT); SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, MVT::Other, CopyTo, CMP, DAG.getBasicBlock(JT.Default)); // Avoid emitting unnecessary branches to the next block. if (JT.MBB != NextBlock(SwitchBB)) BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, DAG.getBasicBlock(JT.MBB)); DAG.setRoot(BrCond); } else { // Avoid emitting unnecessary branches to the next block. if (JT.MBB != NextBlock(SwitchBB)) DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, CopyTo, DAG.getBasicBlock(JT.MBB))); else DAG.setRoot(CopyTo); } } /// Create a LOAD_STACK_GUARD node, and let it carry the target specific global /// variable if there exists one. static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL, SDValue &Chain) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); EVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout()); MachineFunction &MF = DAG.getMachineFunction(); Value *Global = TLI.getSDagStackGuard(*MF.getFunction().getParent()); MachineSDNode *Node = DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, DL, PtrTy, Chain); if (Global) { MachinePointerInfo MPInfo(Global); auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable; MachineMemOperand *MemRef = MF.getMachineMemOperand( MPInfo, Flags, LocationSize::precise(PtrTy.getSizeInBits() / 8), DAG.getEVTAlign(PtrTy)); DAG.setNodeMemRefs(Node, {MemRef}); } if (PtrTy != PtrMemTy) return DAG.getPtrExtOrTrunc(SDValue(Node, 0), DL, PtrMemTy); return SDValue(Node, 0); } /// Codegen a new tail for a stack protector check ParentMBB which has had its /// tail spliced into a stack protector check success bb. /// /// For a high level explanation of how this fits into the stack protector /// generation see the comment on the declaration of class /// StackProtectorDescriptor. void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, MachineBasicBlock *ParentBB) { // First create the loads to the guard/stack slot for the comparison. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); EVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout()); MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo(); int FI = MFI.getStackProtectorIndex(); SDValue Guard; SDLoc dl = getCurSDLoc(); SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy); const Module &M = *ParentBB->getParent()->getFunction().getParent(); Align Align = DAG.getDataLayout().getPrefTypeAlign(PointerType::get(M.getContext(), 0)); // Generate code to load the content of the guard slot. SDValue GuardVal = DAG.getLoad( PtrMemTy, dl, DAG.getEntryNode(), StackSlotPtr, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), Align, MachineMemOperand::MOVolatile); if (TLI.useStackGuardXorFP()) GuardVal = TLI.emitStackGuardXorFP(DAG, GuardVal, dl); // Retrieve guard check function, nullptr if instrumentation is inlined. if (const Function *GuardCheckFn = TLI.getSSPStackGuardCheck(M)) { // The target provides a guard check function to validate the guard value. // Generate a call to that function with the content of the guard slot as // argument. FunctionType *FnTy = GuardCheckFn->getFunctionType(); assert(FnTy->getNumParams() == 1 && "Invalid function signature"); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = GuardVal; Entry.Ty = FnTy->getParamType(0); if (GuardCheckFn->hasParamAttribute(0, Attribute::AttrKind::InReg)) Entry.IsInReg = true; Args.push_back(Entry); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(getCurSDLoc()) .setChain(DAG.getEntryNode()) .setCallee(GuardCheckFn->getCallingConv(), FnTy->getReturnType(), getValue(GuardCheckFn), std::move(Args)); std::pair Result = TLI.LowerCallTo(CLI); DAG.setRoot(Result.second); return; } // If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD. // Otherwise, emit a volatile load to retrieve the stack guard value. SDValue Chain = DAG.getEntryNode(); if (TLI.useLoadStackGuardNode()) { Guard = getLoadStackGuard(DAG, dl, Chain); } else { const Value *IRGuard = TLI.getSDagStackGuard(M); SDValue GuardPtr = getValue(IRGuard); Guard = DAG.getLoad(PtrMemTy, dl, Chain, GuardPtr, MachinePointerInfo(IRGuard, 0), Align, MachineMemOperand::MOVolatile); } // Perform the comparison via a getsetcc. SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), Guard.getValueType()), Guard, GuardVal, ISD::SETNE); // If the guard/stackslot do not equal, branch to failure MBB. SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, MVT::Other, GuardVal.getOperand(0), Cmp, DAG.getBasicBlock(SPD.getFailureMBB())); // Otherwise branch to success MBB. SDValue Br = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, DAG.getBasicBlock(SPD.getSuccessMBB())); DAG.setRoot(Br); } /// Codegen the failure basic block for a stack protector check. /// /// A failure stack protector machine basic block consists simply of a call to /// __stack_chk_fail(). /// /// For a high level explanation of how this fits into the stack protector /// generation see the comment on the declaration of class /// StackProtectorDescriptor. void SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); TargetLowering::MakeLibCallOptions CallOptions; CallOptions.setDiscardResult(true); SDValue Chain = TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid, std::nullopt, CallOptions, getCurSDLoc()) .second; // On PS4/PS5, the "return address" must still be within the calling // function, even if it's at the very end, so emit an explicit TRAP here. // Passing 'true' for doesNotReturn above won't generate the trap for us. if (TM.getTargetTriple().isPS()) Chain = DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, Chain); // WebAssembly needs an unreachable instruction after a non-returning call, // because the function return type can be different from __stack_chk_fail's // return type (void). if (TM.getTargetTriple().isWasm()) Chain = DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, Chain); DAG.setRoot(Chain); } /// visitBitTestHeader - This function emits necessary code to produce value /// suitable for "bit tests" void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB) { SDLoc dl = getCurSDLoc(); // Subtract the minimum value. SDValue SwitchOp = getValue(B.SValue); EVT VT = SwitchOp.getValueType(); SDValue RangeSub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp, DAG.getConstant(B.First, dl, VT)); // Determine the type of the test operands. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); bool UsePtrType = false; if (!TLI.isTypeLegal(VT)) { UsePtrType = true; } else { for (unsigned i = 0, e = B.Cases.size(); i != e; ++i) if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) { // Switch table case range are encoded into series of masks. // Just use pointer type, it's guaranteed to fit. UsePtrType = true; break; } } SDValue Sub = RangeSub; if (UsePtrType) { VT = TLI.getPointerTy(DAG.getDataLayout()); Sub = DAG.getZExtOrTrunc(Sub, dl, VT); } B.RegVT = VT.getSimpleVT(); B.Reg = FuncInfo.CreateReg(B.RegVT); SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, B.Reg, Sub); MachineBasicBlock* MBB = B.Cases[0].ThisBB; if (!B.FallthroughUnreachable) addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb); addSuccessorWithProb(SwitchBB, MBB, B.Prob); SwitchBB->normalizeSuccProbs(); SDValue Root = CopyTo; if (!B.FallthroughUnreachable) { // Conditional branch to the default block. SDValue RangeCmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RangeSub.getValueType()), RangeSub, DAG.getConstant(B.Range, dl, RangeSub.getValueType()), ISD::SETUGT); Root = DAG.getNode(ISD::BRCOND, dl, MVT::Other, Root, RangeCmp, DAG.getBasicBlock(B.Default)); } // Avoid emitting unnecessary branches to the next block. if (MBB != NextBlock(SwitchBB)) Root = DAG.getNode(ISD::BR, dl, MVT::Other, Root, DAG.getBasicBlock(MBB)); DAG.setRoot(Root); } /// visitBitTestCase - this function produces one "bit test" void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, MachineBasicBlock* NextMBB, BranchProbability BranchProbToNext, unsigned Reg, BitTestCase &B, MachineBasicBlock *SwitchBB) { SDLoc dl = getCurSDLoc(); MVT VT = BB.RegVT; SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), dl, Reg, VT); SDValue Cmp; unsigned PopCount = llvm::popcount(B.Mask); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (PopCount == 1) { // Testing for a single bit; just compare the shift count with what it // would need to be to shift a 1 bit in that position. Cmp = DAG.getSetCC( dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), ShiftOp, DAG.getConstant(llvm::countr_zero(B.Mask), dl, VT), ISD::SETEQ); } else if (PopCount == BB.Range) { // There is only one zero bit in the range, test for it directly. Cmp = DAG.getSetCC( dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), ShiftOp, DAG.getConstant(llvm::countr_one(B.Mask), dl, VT), ISD::SETNE); } else { // Make desired shift SDValue SwitchVal = DAG.getNode(ISD::SHL, dl, VT, DAG.getConstant(1, dl, VT), ShiftOp); // Emit bit tests and jumps SDValue AndOp = DAG.getNode(ISD::AND, dl, VT, SwitchVal, DAG.getConstant(B.Mask, dl, VT)); Cmp = DAG.getSetCC( dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), AndOp, DAG.getConstant(0, dl, VT), ISD::SETNE); } // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb. addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb); // The branch probability from SwitchBB to NextMBB is BranchProbToNext. addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext); // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is // one as they are relative probabilities (and thus work more like weights), // and hence we need to normalize them to let the sum of them become one. SwitchBB->normalizeSuccProbs(); SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl, MVT::Other, getControlRoot(), Cmp, DAG.getBasicBlock(B.TargetBB)); // Avoid emitting unnecessary branches to the next block. if (NextMBB != NextBlock(SwitchBB)) BrAnd = DAG.getNode(ISD::BR, dl, MVT::Other, BrAnd, DAG.getBasicBlock(NextMBB)); DAG.setRoot(BrAnd); } void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { MachineBasicBlock *InvokeMBB = FuncInfo.MBB; // Retrieve successors. Look through artificial IR level blocks like // catchswitch for successors. MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)]; const BasicBlock *EHPadBB = I.getSuccessor(1); MachineBasicBlock *EHPadMBB = FuncInfo.MBBMap[EHPadBB]; // Deopt and ptrauth bundles are lowered in helper functions, and we don't // have to do anything here to lower funclet bundles. assert(!I.hasOperandBundlesOtherThan( {LLVMContext::OB_deopt, LLVMContext::OB_gc_transition, LLVMContext::OB_gc_live, LLVMContext::OB_funclet, LLVMContext::OB_cfguardtarget, LLVMContext::OB_ptrauth, LLVMContext::OB_clang_arc_attachedcall}) && "Cannot lower invokes with arbitrary operand bundles yet!"); const Value *Callee(I.getCalledOperand()); const Function *Fn = dyn_cast(Callee); if (isa(Callee)) visitInlineAsm(I, EHPadBB); else if (Fn && Fn->isIntrinsic()) { switch (Fn->getIntrinsicID()) { default: llvm_unreachable("Cannot invoke this intrinsic"); case Intrinsic::donothing: // Ignore invokes to @llvm.donothing: jump directly to the next BB. case Intrinsic::seh_try_begin: case Intrinsic::seh_scope_begin: case Intrinsic::seh_try_end: case Intrinsic::seh_scope_end: if (EHPadMBB) // a block referenced by EH table // so dtor-funclet not removed by opts EHPadMBB->setMachineBlockAddressTaken(); break; case Intrinsic::experimental_patchpoint_void: case Intrinsic::experimental_patchpoint: visitPatchpoint(I, EHPadBB); break; case Intrinsic::experimental_gc_statepoint: LowerStatepoint(cast(I), EHPadBB); break; case Intrinsic::wasm_rethrow: { // This is usually done in visitTargetIntrinsic, but this intrinsic is // special because it can be invoked, so we manually lower it to a DAG // node here. SmallVector Ops; Ops.push_back(getControlRoot()); // inchain for the terminator node const TargetLowering &TLI = DAG.getTargetLoweringInfo(); Ops.push_back( DAG.getTargetConstant(Intrinsic::wasm_rethrow, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); SDVTList VTs = DAG.getVTList(ArrayRef({MVT::Other})); // outchain DAG.setRoot(DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops)); break; } } } else if (I.hasDeoptState()) { // Currently we do not lower any intrinsic calls with deopt operand bundles. // Eventually we will support lowering the @llvm.experimental.deoptimize // intrinsic, and right now there are no plans to support other intrinsics // with deopt state. LowerCallSiteWithDeoptBundle(&I, getValue(Callee), EHPadBB); } else if (I.countOperandBundlesOfType(LLVMContext::OB_ptrauth)) { LowerCallSiteWithPtrAuthBundle(cast(I), EHPadBB); } else { LowerCallTo(I, getValue(Callee), false, false, EHPadBB); } // If the value of the invoke is used outside of its defining block, make it // available as a virtual register. // We already took care of the exported value for the statepoint instruction // during call to the LowerStatepoint. if (!isa(I)) { CopyToExportRegsIfNeeded(&I); } SmallVector, 1> UnwindDests; BranchProbabilityInfo *BPI = FuncInfo.BPI; BranchProbability EHPadBBProb = BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB) : BranchProbability::getZero(); findUnwindDestinations(FuncInfo, EHPadBB, EHPadBBProb, UnwindDests); // Update successor info. addSuccessorWithProb(InvokeMBB, Return); for (auto &UnwindDest : UnwindDests) { UnwindDest.first->setIsEHPad(); addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second); } InvokeMBB->normalizeSuccProbs(); // Drop into normal successor. DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(Return))); } void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) { MachineBasicBlock *CallBrMBB = FuncInfo.MBB; // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't // have to do anything here to lower funclet bundles. assert(!I.hasOperandBundlesOtherThan( {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) && "Cannot lower callbrs with arbitrary operand bundles yet!"); assert(I.isInlineAsm() && "Only know how to handle inlineasm callbr"); visitInlineAsm(I); CopyToExportRegsIfNeeded(&I); // Retrieve successors. SmallPtrSet Dests; Dests.insert(I.getDefaultDest()); MachineBasicBlock *Return = FuncInfo.MBBMap[I.getDefaultDest()]; // Update successor info. addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne()); for (unsigned i = 0, e = I.getNumIndirectDests(); i < e; ++i) { BasicBlock *Dest = I.getIndirectDest(i); MachineBasicBlock *Target = FuncInfo.MBBMap[Dest]; Target->setIsInlineAsmBrIndirectTarget(); Target->setMachineBlockAddressTaken(); Target->setLabelMustBeEmitted(); // Don't add duplicate machine successors. if (Dests.insert(Dest).second) addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero()); } CallBrMBB->normalizeSuccProbs(); // Drop into default successor. DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(Return))); } void SelectionDAGBuilder::visitResume(const ResumeInst &RI) { llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!"); } void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { assert(FuncInfo.MBB->isEHPad() && "Call to landingpad not in landing pad!"); // If there aren't registers to copy the values into (e.g., during SjLj // exceptions), then don't bother to create these DAG nodes. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const Constant *PersonalityFn = FuncInfo.Fn->getPersonalityFn(); if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 && TLI.getExceptionSelectorRegister(PersonalityFn) == 0) return; // If landingpad's return type is token type, we don't create DAG nodes // for its exception pointer and selector value. The extraction of exception // pointer or selector value from token type landingpads is not currently // supported. if (LP.getType()->isTokenTy()) return; SmallVector ValueVTs; SDLoc dl = getCurSDLoc(); ComputeValueVTs(TLI, DAG.getDataLayout(), LP.getType(), ValueVTs); assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported"); // Get the two live-in registers as SDValues. The physregs have already been // copied into virtual registers. SDValue Ops[2]; if (FuncInfo.ExceptionPointerVirtReg) { Ops[0] = DAG.getZExtOrTrunc( DAG.getCopyFromReg(DAG.getEntryNode(), dl, FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy(DAG.getDataLayout())), dl, ValueVTs[0]); } else { Ops[0] = DAG.getConstant(0, dl, TLI.getPointerTy(DAG.getDataLayout())); } Ops[1] = DAG.getZExtOrTrunc( DAG.getCopyFromReg(DAG.getEntryNode(), dl, FuncInfo.ExceptionSelectorVirtReg, TLI.getPointerTy(DAG.getDataLayout())), dl, ValueVTs[1]); // Merge into one. SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Ops); setValue(&LP, Res); } void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last) { // Update JTCases. for (JumpTableBlock &JTB : SL->JTCases) if (JTB.first.HeaderBB == First) JTB.first.HeaderBB = Last; // Update BitTestCases. for (BitTestBlock &BTB : SL->BitTestCases) if (BTB.Parent == First) BTB.Parent = Last; } void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB; // Update machine-CFG edges with unique successors. SmallSet Done; for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) { BasicBlock *BB = I.getSuccessor(i); bool Inserted = Done.insert(BB).second; if (!Inserted) continue; MachineBasicBlock *Succ = FuncInfo.MBBMap[BB]; addSuccessorWithProb(IndirectBrMBB, Succ); } IndirectBrMBB->normalizeSuccProbs(); DAG.setRoot(DAG.getNode(ISD::BRIND, getCurSDLoc(), MVT::Other, getControlRoot(), getValue(I.getAddress()))); } void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) { if (!DAG.getTarget().Options.TrapUnreachable) return; // We may be able to ignore unreachable behind a noreturn call. if (const CallInst *Call = dyn_cast_or_null(I.getPrevNode()); Call && Call->doesNotReturn()) { if (DAG.getTarget().Options.NoTrapAfterNoreturn) return; // Do not emit an additional trap instruction. if (Call->isNonContinuableTrap()) return; } DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot())); } void SelectionDAGBuilder::visitUnary(const User &I, unsigned Opcode) { SDNodeFlags Flags; if (auto *FPOp = dyn_cast(&I)) Flags.copyFMF(*FPOp); SDValue Op = getValue(I.getOperand(0)); SDValue UnNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op.getValueType(), Op, Flags); setValue(&I, UnNodeValue); } void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) { SDNodeFlags Flags; if (auto *OFBinOp = dyn_cast(&I)) { Flags.setNoSignedWrap(OFBinOp->hasNoSignedWrap()); Flags.setNoUnsignedWrap(OFBinOp->hasNoUnsignedWrap()); } if (auto *ExactOp = dyn_cast(&I)) Flags.setExact(ExactOp->isExact()); if (auto *DisjointOp = dyn_cast(&I)) Flags.setDisjoint(DisjointOp->isDisjoint()); if (auto *FPOp = dyn_cast(&I)) Flags.copyFMF(*FPOp); SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); SDValue BinNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2, Flags); setValue(&I, BinNodeValue); } void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy( Op1.getValueType(), DAG.getDataLayout()); // Coerce the shift amount to the right type if we can. This exposes the // truncate or zext to optimization early. if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) { assert(ShiftTy.getSizeInBits() >= Log2_32_Ceil(Op1.getValueSizeInBits()) && "Unexpected shift type"); Op2 = DAG.getZExtOrTrunc(Op2, getCurSDLoc(), ShiftTy); } bool nuw = false; bool nsw = false; bool exact = false; if (Opcode == ISD::SRL || Opcode == ISD::SRA || Opcode == ISD::SHL) { if (const OverflowingBinaryOperator *OFBinOp = dyn_cast(&I)) { nuw = OFBinOp->hasNoUnsignedWrap(); nsw = OFBinOp->hasNoSignedWrap(); } if (const PossiblyExactOperator *ExactOp = dyn_cast(&I)) exact = ExactOp->isExact(); } SDNodeFlags Flags; Flags.setExact(exact); Flags.setNoSignedWrap(nsw); Flags.setNoUnsignedWrap(nuw); SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2, Flags); setValue(&I, Res); } void SelectionDAGBuilder::visitSDiv(const User &I) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); SDNodeFlags Flags; Flags.setExact(isa(&I) && cast(&I)->isExact()); setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1, Op2, Flags)); } void SelectionDAGBuilder::visitICmp(const ICmpInst &I) { ICmpInst::Predicate predicate = I.getPredicate(); SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Opcode = getICmpCondCode(predicate); auto &TLI = DAG.getTargetLoweringInfo(); EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getOperand(0)->getType()); // If a pointer's DAG type is larger than its memory type then the DAG values // are zero-extended. This breaks signed comparisons so truncate back to the // underlying type before doing the compare. if (Op1.getValueType() != MemVT) { Op1 = DAG.getPtrExtOrTrunc(Op1, getCurSDLoc(), MemVT); Op2 = DAG.getPtrExtOrTrunc(Op2, getCurSDLoc(), MemVT); } EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode)); } void SelectionDAGBuilder::visitFCmp(const FCmpInst &I) { FCmpInst::Predicate predicate = I.getPredicate(); SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Condition = getFCmpCondCode(predicate); auto *FPMO = cast(&I); if (FPMO->hasNoNaNs() || TM.Options.NoNaNsFPMath) Condition = getFCmpCodeWithoutNaN(Condition); SDNodeFlags Flags; Flags.copyFMF(*FPMO); SelectionDAG::FlagInserter FlagsInserter(DAG, Flags); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition)); } // Check if the condition of the select has one use or two users that are both // selects with the same condition. static bool hasOnlySelectUsers(const Value *Cond) { return llvm::all_of(Cond->users(), [](const Value *V) { return isa(V); }); } void SelectionDAGBuilder::visitSelect(const User &I) { SmallVector ValueVTs; ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; SmallVector Values(NumValues); SDValue Cond = getValue(I.getOperand(0)); SDValue LHSVal = getValue(I.getOperand(1)); SDValue RHSVal = getValue(I.getOperand(2)); SmallVector BaseOps(1, Cond); ISD::NodeType OpCode = Cond.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT; bool IsUnaryAbs = false; bool Negate = false; SDNodeFlags Flags; if (auto *FPOp = dyn_cast(&I)) Flags.copyFMF(*FPOp); Flags.setUnpredictable( cast(I).getMetadata(LLVMContext::MD_unpredictable)); // Min/max matching is only viable if all output VTs are the same. if (all_equal(ValueVTs)) { EVT VT = ValueVTs[0]; LLVMContext &Ctx = *DAG.getContext(); auto &TLI = DAG.getTargetLoweringInfo(); // We care about the legality of the operation after it has been type // legalized. while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal) VT = TLI.getTypeToTransformTo(Ctx, VT); // If the vselect is legal, assume we want to leave this as a vector setcc + // vselect. Otherwise, if this is going to be scalarized, we want to see if // min/max is legal on the scalar type. bool UseScalarMinMax = VT.isVector() && !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT); // ValueTracking's select pattern matching does not account for -0.0, // so we can't lower to FMINIMUM/FMAXIMUM because those nodes specify that // -0.0 is less than +0.0. Value *LHS, *RHS; auto SPR = matchSelectPattern(const_cast(&I), LHS, RHS); ISD::NodeType Opc = ISD::DELETED_NODE; switch (SPR.Flavor) { case SPF_UMAX: Opc = ISD::UMAX; break; case SPF_UMIN: Opc = ISD::UMIN; break; case SPF_SMAX: Opc = ISD::SMAX; break; case SPF_SMIN: Opc = ISD::SMIN; break; case SPF_FMINNUM: switch (SPR.NaNBehavior) { case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?"); case SPNB_RETURNS_NAN: break; case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break; case SPNB_RETURNS_ANY: if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT) || (UseScalarMinMax && TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()))) Opc = ISD::FMINNUM; break; } break; case SPF_FMAXNUM: switch (SPR.NaNBehavior) { case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?"); case SPNB_RETURNS_NAN: break; case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break; case SPNB_RETURNS_ANY: if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT) || (UseScalarMinMax && TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()))) Opc = ISD::FMAXNUM; break; } break; case SPF_NABS: Negate = true; [[fallthrough]]; case SPF_ABS: IsUnaryAbs = true; Opc = ISD::ABS; break; default: break; } if (!IsUnaryAbs && Opc != ISD::DELETED_NODE && (TLI.isOperationLegalOrCustomOrPromote(Opc, VT) || (UseScalarMinMax && TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) && // If the underlying comparison instruction is used by any other // instruction, the consumed instructions won't be destroyed, so it is // not profitable to convert to a min/max. hasOnlySelectUsers(cast(I).getCondition())) { OpCode = Opc; LHSVal = getValue(LHS); RHSVal = getValue(RHS); BaseOps.clear(); } if (IsUnaryAbs) { OpCode = Opc; LHSVal = getValue(LHS); BaseOps.clear(); } } if (IsUnaryAbs) { for (unsigned i = 0; i != NumValues; ++i) { SDLoc dl = getCurSDLoc(); EVT VT = LHSVal.getNode()->getValueType(LHSVal.getResNo() + i); Values[i] = DAG.getNode(OpCode, dl, VT, LHSVal.getValue(LHSVal.getResNo() + i)); if (Negate) Values[i] = DAG.getNegative(Values[i], dl, VT); } } else { for (unsigned i = 0; i != NumValues; ++i) { SmallVector Ops(BaseOps.begin(), BaseOps.end()); Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i)); Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i)); Values[i] = DAG.getNode( OpCode, getCurSDLoc(), LHSVal.getNode()->getValueType(LHSVal.getResNo() + i), Ops, Flags); } } setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), DAG.getVTList(ValueVTs), Values)); } void SelectionDAGBuilder::visitTrunc(const User &I) { // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest). SDValue N = getValue(I.getOperand(0)); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitZExt(const User &I) { // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest). // ZExt also can't be a cast to bool for same reason. So, nothing much to do SDValue N = getValue(I.getOperand(0)); auto &TLI = DAG.getTargetLoweringInfo(); EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); SDNodeFlags Flags; if (auto *PNI = dyn_cast(&I)) Flags.setNonNeg(PNI->hasNonNeg()); // Eagerly use nonneg information to canonicalize towards sign_extend if // that is the target's preference. // TODO: Let the target do this later. if (Flags.hasNonNeg() && TLI.isSExtCheaperThanZExt(N.getValueType(), DestVT)) { setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N)); return; } setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N, Flags)); } void SelectionDAGBuilder::visitSExt(const User &I) { // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest). // SExt also can't be a cast to bool for same reason. So, nothing much to do SDValue N = getValue(I.getOperand(0)); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitFPTrunc(const User &I) { // FPTrunc is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); SDLoc dl = getCurSDLoc(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N, DAG.getTargetConstant( 0, dl, TLI.getPointerTy(DAG.getDataLayout())))); } void SelectionDAGBuilder::visitFPExt(const User &I) { // FPExt is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitFPToUI(const User &I) { // FPToUI is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitFPToSI(const User &I) { // FPToSI is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitUIToFP(const User &I) { // UIToFP is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); SDNodeFlags Flags; if (auto *PNI = dyn_cast(&I)) Flags.setNonNeg(PNI->hasNonNeg()); setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N, Flags)); } void SelectionDAGBuilder::visitSIToFP(const User &I) { // SIToFP is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitPtrToInt(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); auto &TLI = DAG.getTargetLoweringInfo(); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); EVT PtrMemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getOperand(0)->getType()); N = DAG.getPtrExtOrTrunc(N, getCurSDLoc(), PtrMemVT); N = DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT); setValue(&I, N); } void SelectionDAGBuilder::visitIntToPtr(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); auto &TLI = DAG.getTargetLoweringInfo(); EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); EVT PtrMemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getType()); N = DAG.getZExtOrTrunc(N, getCurSDLoc(), PtrMemVT); N = DAG.getPtrExtOrTrunc(N, getCurSDLoc(), DestVT); setValue(&I, N); } void SelectionDAGBuilder::visitBitCast(const User &I) { SDValue N = getValue(I.getOperand(0)); SDLoc dl = getCurSDLoc(); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); // BitCast assures us that source and destination are the same size so this is // either a BITCAST or a no-op. if (DestVT != N.getValueType()) setValue(&I, DAG.getNode(ISD::BITCAST, dl, DestVT, N)); // convert types. // Check if the original LLVM IR Operand was a ConstantInt, because getValue() // might fold any kind of constant expression to an integer constant and that // is not what we are looking for. Only recognize a bitcast of a genuine // constant integer as an opaque constant. else if(ConstantInt *C = dyn_cast(I.getOperand(0))) setValue(&I, DAG.getConstant(C->getValue(), dl, DestVT, /*isTarget=*/false, /*isOpaque*/true)); else setValue(&I, N); // noop cast. } void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const Value *SV = I.getOperand(0); SDValue N = getValue(SV); EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); unsigned SrcAS = SV->getType()->getPointerAddressSpace(); unsigned DestAS = I.getType()->getPointerAddressSpace(); if (!TM.isNoopAddrSpaceCast(SrcAS, DestAS)) N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS); setValue(&I, N); } void SelectionDAGBuilder::visitInsertElement(const User &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue InVec = getValue(I.getOperand(0)); SDValue InVal = getValue(I.getOperand(1)); SDValue InIdx = DAG.getZExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(), TLI.getVectorIdxTy(DAG.getDataLayout())); setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(), TLI.getValueType(DAG.getDataLayout(), I.getType()), InVec, InVal, InIdx)); } void SelectionDAGBuilder::visitExtractElement(const User &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue InVec = getValue(I.getOperand(0)); SDValue InIdx = DAG.getZExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(), TLI.getVectorIdxTy(DAG.getDataLayout())); setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(), TLI.getValueType(DAG.getDataLayout(), I.getType()), InVec, InIdx)); } void SelectionDAGBuilder::visitShuffleVector(const User &I) { SDValue Src1 = getValue(I.getOperand(0)); SDValue Src2 = getValue(I.getOperand(1)); ArrayRef Mask; if (auto *SVI = dyn_cast(&I)) Mask = SVI->getShuffleMask(); else Mask = cast(I).getShuffleMask(); SDLoc DL = getCurSDLoc(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); EVT SrcVT = Src1.getValueType(); if (all_of(Mask, [](int Elem) { return Elem == 0; }) && VT.isScalableVector()) { // Canonical splat form of first element of first input vector. SDValue FirstElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcVT.getScalarType(), Src1, DAG.getVectorIdxConstant(0, DL)); setValue(&I, DAG.getNode(ISD::SPLAT_VECTOR, DL, VT, FirstElt)); return; } // For now, we only handle splats for scalable vectors. // The DAGCombiner will perform a BUILD_VECTOR -> SPLAT_VECTOR transformation // for targets that support a SPLAT_VECTOR for non-scalable vector types. assert(!VT.isScalableVector() && "Unsupported scalable vector shuffle"); unsigned SrcNumElts = SrcVT.getVectorNumElements(); unsigned MaskNumElts = Mask.size(); if (SrcNumElts == MaskNumElts) { setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, Mask)); return; } // Normalize the shuffle vector since mask and vector length don't match. if (SrcNumElts < MaskNumElts) { // Mask is longer than the source vectors. We can use concatenate vector to // make the mask and vectors lengths match. if (MaskNumElts % SrcNumElts == 0) { // Mask length is a multiple of the source vector length. // Check if the shuffle is some kind of concatenation of the input // vectors. unsigned NumConcat = MaskNumElts / SrcNumElts; bool IsConcat = true; SmallVector ConcatSrcs(NumConcat, -1); for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; if (Idx < 0) continue; // Ensure the indices in each SrcVT sized piece are sequential and that // the same source is used for the whole piece. if ((Idx % SrcNumElts != (i % SrcNumElts)) || (ConcatSrcs[i / SrcNumElts] >= 0 && ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) { IsConcat = false; break; } // Remember which source this index came from. ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts; } // The shuffle is concatenating multiple vectors together. Just emit // a CONCAT_VECTORS operation. if (IsConcat) { SmallVector ConcatOps; for (auto Src : ConcatSrcs) { if (Src < 0) ConcatOps.push_back(DAG.getUNDEF(SrcVT)); else if (Src == 0) ConcatOps.push_back(Src1); else ConcatOps.push_back(Src2); } setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps)); return; } } unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts); unsigned NumConcat = PaddedMaskNumElts / SrcNumElts; EVT PaddedVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), PaddedMaskNumElts); // Pad both vectors with undefs to make them the same length as the mask. SDValue UndefVal = DAG.getUNDEF(SrcVT); SmallVector MOps1(NumConcat, UndefVal); SmallVector MOps2(NumConcat, UndefVal); MOps1[0] = Src1; MOps2[0] = Src2; Src1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps1); Src2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps2); // Readjust mask for new input vector length. SmallVector MappedOps(PaddedMaskNumElts, -1); for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts - PaddedMaskNumElts; MappedOps[i] = Idx; } SDValue Result = DAG.getVectorShuffle(PaddedVT, DL, Src1, Src2, MappedOps); // If the concatenated vector was padded, extract a subvector with the // correct number of elements. if (MaskNumElts != PaddedMaskNumElts) Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Result, DAG.getVectorIdxConstant(0, DL)); setValue(&I, Result); return; } if (SrcNumElts > MaskNumElts) { // Analyze the access pattern of the vector to see if we can extract // two subvectors and do the shuffle. int StartIdx[2] = { -1, -1 }; // StartIdx to extract from bool CanExtract = true; for (int Idx : Mask) { unsigned Input = 0; if (Idx < 0) continue; if (Idx >= (int)SrcNumElts) { Input = 1; Idx -= SrcNumElts; } // If all the indices come from the same MaskNumElts sized portion of // the sources we can use extract. Also make sure the extract wouldn't // extract past the end of the source. int NewStartIdx = alignDown(Idx, MaskNumElts); if (NewStartIdx + MaskNumElts > SrcNumElts || (StartIdx[Input] >= 0 && StartIdx[Input] != NewStartIdx)) CanExtract = false; // Make sure we always update StartIdx as we use it to track if all // elements are undef. StartIdx[Input] = NewStartIdx; } if (StartIdx[0] < 0 && StartIdx[1] < 0) { setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used. return; } if (CanExtract) { // Extract appropriate subvector and generate a vector shuffle for (unsigned Input = 0; Input < 2; ++Input) { SDValue &Src = Input == 0 ? Src1 : Src2; if (StartIdx[Input] < 0) Src = DAG.getUNDEF(VT); else { Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Src, DAG.getVectorIdxConstant(StartIdx[Input], DL)); } } // Calculate new mask. SmallVector MappedOps(Mask); for (int &Idx : MappedOps) { if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts + StartIdx[1] - MaskNumElts; else if (Idx >= 0) Idx -= StartIdx[0]; } setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, MappedOps)); return; } } // We can't use either concat vectors or extract subvectors so fall back to // replacing the shuffle with extract and build vector. // to insert and build vector. EVT EltVT = VT.getVectorElementType(); SmallVector Ops; for (int Idx : Mask) { SDValue Res; if (Idx < 0) { Res = DAG.getUNDEF(EltVT); } else { SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2; if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts; Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src, DAG.getVectorIdxConstant(Idx, DL)); } Ops.push_back(Res); } setValue(&I, DAG.getBuildVector(VT, DL, Ops)); } void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { ArrayRef Indices = I.getIndices(); const Value *Op0 = I.getOperand(0); const Value *Op1 = I.getOperand(1); Type *AggTy = I.getType(); Type *ValTy = Op1->getType(); bool IntoUndef = isa(Op0); bool FromUndef = isa(Op1); unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector AggValueVTs; ComputeValueVTs(TLI, DAG.getDataLayout(), AggTy, AggValueVTs); SmallVector ValValueVTs; ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs); unsigned NumAggValues = AggValueVTs.size(); unsigned NumValValues = ValValueVTs.size(); SmallVector Values(NumAggValues); // Ignore an insertvalue that produces an empty object if (!NumAggValues) { setValue(&I, DAG.getUNDEF(MVT(MVT::Other))); return; } SDValue Agg = getValue(Op0); unsigned i = 0; // Copy the beginning value(s) from the original aggregate. for (; i != LinearIndex; ++i) Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : SDValue(Agg.getNode(), Agg.getResNo() + i); // Copy values from the inserted value(s). if (NumValValues) { SDValue Val = getValue(Op1); for (; i != LinearIndex + NumValValues; ++i) Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) : SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex); } // Copy remaining value(s) from the original aggregate. for (; i != NumAggValues; ++i) Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : SDValue(Agg.getNode(), Agg.getResNo() + i); setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), DAG.getVTList(AggValueVTs), Values)); } void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { ArrayRef Indices = I.getIndices(); const Value *Op0 = I.getOperand(0); Type *AggTy = Op0->getType(); Type *ValTy = I.getType(); bool OutOfUndef = isa(Op0); unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector ValValueVTs; ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs); unsigned NumValValues = ValValueVTs.size(); // Ignore a extractvalue that produces an empty object if (!NumValValues) { setValue(&I, DAG.getUNDEF(MVT(MVT::Other))); return; } SmallVector Values(NumValValues); SDValue Agg = getValue(Op0); // Copy out the selected value(s). for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i) Values[i - LinearIndex] = OutOfUndef ? DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) : SDValue(Agg.getNode(), Agg.getResNo() + i); setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), DAG.getVTList(ValValueVTs), Values)); } void SelectionDAGBuilder::visitGetElementPtr(const User &I) { Value *Op0 = I.getOperand(0); // Note that the pointer operand may be a vector of pointers. Take the scalar // element which holds a pointer. unsigned AS = Op0->getType()->getScalarType()->getPointerAddressSpace(); SDValue N = getValue(Op0); SDLoc dl = getCurSDLoc(); auto &TLI = DAG.getTargetLoweringInfo(); // Normalize Vector GEP - all scalar operands should be converted to the // splat vector. bool IsVectorGEP = I.getType()->isVectorTy(); ElementCount VectorElementCount = IsVectorGEP ? cast(I.getType())->getElementCount() : ElementCount::getFixed(0); if (IsVectorGEP && !N.getValueType().isVector()) { LLVMContext &Context = *DAG.getContext(); EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorElementCount); N = DAG.getSplat(VT, dl, N); } for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I); GTI != E; ++GTI) { const Value *Idx = GTI.getOperand(); if (StructType *StTy = GTI.getStructTypeOrNull()) { unsigned Field = cast(Idx)->getUniqueInteger().getZExtValue(); if (Field) { // N = N + Offset uint64_t Offset = DAG.getDataLayout().getStructLayout(StTy)->getElementOffset(Field); // In an inbounds GEP with an offset that is nonnegative even when // interpreted as signed, assume there is no unsigned overflow. SDNodeFlags Flags; if (int64_t(Offset) >= 0 && cast(I).isInBounds()) Flags.setNoUnsignedWrap(true); N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, DAG.getConstant(Offset, dl, N.getValueType()), Flags); } } else { // IdxSize is the width of the arithmetic according to IR semantics. // In SelectionDAG, we may prefer to do arithmetic in a wider bitwidth // (and fix up the result later). unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS); MVT IdxTy = MVT::getIntegerVT(IdxSize); TypeSize ElementSize = GTI.getSequentialElementStride(DAG.getDataLayout()); // We intentionally mask away the high bits here; ElementSize may not // fit in IdxTy. APInt ElementMul(IdxSize, ElementSize.getKnownMinValue()); bool ElementScalable = ElementSize.isScalable(); // If this is a scalar constant or a splat vector of constants, // handle it quickly. const auto *C = dyn_cast(Idx); if (C && isa(C->getType())) C = C->getSplatValue(); const auto *CI = dyn_cast_or_null(C); if (CI && CI->isZero()) continue; if (CI && !ElementScalable) { APInt Offs = ElementMul * CI->getValue().sextOrTrunc(IdxSize); LLVMContext &Context = *DAG.getContext(); SDValue OffsVal; if (IsVectorGEP) OffsVal = DAG.getConstant( Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorElementCount)); else OffsVal = DAG.getConstant(Offs, dl, IdxTy); // In an inbounds GEP with an offset that is nonnegative even when // interpreted as signed, assume there is no unsigned overflow. SDNodeFlags Flags; if (Offs.isNonNegative() && cast(I).isInBounds()) Flags.setNoUnsignedWrap(true); OffsVal = DAG.getSExtOrTrunc(OffsVal, dl, N.getValueType()); N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, Flags); continue; } // N = N + Idx * ElementMul; SDValue IdxN = getValue(Idx); if (!IdxN.getValueType().isVector() && IsVectorGEP) { EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), VectorElementCount); IdxN = DAG.getSplat(VT, dl, IdxN); } // If the index is smaller or larger than intptr_t, truncate or extend // it. IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType()); if (ElementScalable) { EVT VScaleTy = N.getValueType().getScalarType(); SDValue VScale = DAG.getNode( ISD::VSCALE, dl, VScaleTy, DAG.getConstant(ElementMul.getZExtValue(), dl, VScaleTy)); if (IsVectorGEP) VScale = DAG.getSplatVector(N.getValueType(), dl, VScale); IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, VScale); } else { // If this is a multiply by a power of two, turn it into a shl // immediately. This is a very common case. if (ElementMul != 1) { if (ElementMul.isPowerOf2()) { unsigned Amt = ElementMul.logBase2(); IdxN = DAG.getNode(ISD::SHL, dl, N.getValueType(), IdxN, DAG.getConstant(Amt, dl, IdxN.getValueType())); } else { SDValue Scale = DAG.getConstant(ElementMul.getZExtValue(), dl, IdxN.getValueType()); IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, Scale); } } } N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, IdxN); } } MVT PtrTy = TLI.getPointerTy(DAG.getDataLayout(), AS); MVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout(), AS); if (IsVectorGEP) { PtrTy = MVT::getVectorVT(PtrTy, VectorElementCount); PtrMemTy = MVT::getVectorVT(PtrMemTy, VectorElementCount); } if (PtrMemTy != PtrTy && !cast(I).isInBounds()) N = DAG.getPtrExtendInReg(N, dl, PtrMemTy); setValue(&I, N); } void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { // If this is a fixed sized alloca in the entry block of the function, // allocate it statically on the stack. if (FuncInfo.StaticAllocaMap.count(&I)) return; // getValue will auto-populate this. SDLoc dl = getCurSDLoc(); Type *Ty = I.getAllocatedType(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); auto &DL = DAG.getDataLayout(); TypeSize TySize = DL.getTypeAllocSize(Ty); MaybeAlign Alignment = std::max(DL.getPrefTypeAlign(Ty), I.getAlign()); SDValue AllocSize = getValue(I.getArraySize()); EVT IntPtr = TLI.getPointerTy(DL, I.getAddressSpace()); if (AllocSize.getValueType() != IntPtr) AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr); if (TySize.isScalable()) AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr, AllocSize, DAG.getVScale(dl, IntPtr, APInt(IntPtr.getScalarSizeInBits(), TySize.getKnownMinValue()))); else { SDValue TySizeValue = DAG.getConstant(TySize.getFixedValue(), dl, MVT::getIntegerVT(64)); AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr, AllocSize, DAG.getZExtOrTrunc(TySizeValue, dl, IntPtr)); } // Handle alignment. If the requested alignment is less than or equal to // the stack alignment, ignore it. If the size is greater than or equal to // the stack alignment, we note this in the DYNAMIC_STACKALLOC node. Align StackAlign = DAG.getSubtarget().getFrameLowering()->getStackAlign(); if (*Alignment <= StackAlign) Alignment = std::nullopt; const uint64_t StackAlignMask = StackAlign.value() - 1U; // Round the size of the allocation up to the stack alignment size // by add SA-1 to the size. This doesn't overflow because we're computing // an address inside an alloca. SDNodeFlags Flags; Flags.setNoUnsignedWrap(true); AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize, DAG.getConstant(StackAlignMask, dl, IntPtr), Flags); // Mask out the low bits for alignment purposes. AllocSize = DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize, DAG.getConstant(~StackAlignMask, dl, IntPtr)); SDValue Ops[] = { getRoot(), AllocSize, DAG.getConstant(Alignment ? Alignment->value() : 0, dl, IntPtr)}; SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other); SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops); setValue(&I, DSA); DAG.setRoot(DSA.getValue(1)); assert(FuncInfo.MF->getFrameInfo().hasVarSizedObjects()); } static const MDNode *getRangeMetadata(const Instruction &I) { // If !noundef is not present, then !range violation results in a poison // value rather than immediate undefined behavior. In theory, transferring // these annotations to SDAG is fine, but in practice there are key SDAG // transforms that are known not to be poison-safe, such as folding logical // and/or to bitwise and/or. For now, only transfer !range if !noundef is // also present. if (!I.hasMetadata(LLVMContext::MD_noundef)) return nullptr; return I.getMetadata(LLVMContext::MD_range); } static std::optional getRange(const Instruction &I) { if (const auto *CB = dyn_cast(&I)) { // see comment in getRangeMetadata about this check if (CB->hasRetAttr(Attribute::NoUndef)) return CB->getRange(); } if (const MDNode *Range = getRangeMetadata(I)) return getConstantRangeFromMetadata(*Range); return std::nullopt; } void SelectionDAGBuilder::visitLoad(const LoadInst &I) { if (I.isAtomic()) return visitAtomicLoad(I); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const Value *SV = I.getOperand(0); if (TLI.supportSwiftError()) { // Swifterror values can come from either a function parameter with // swifterror attribute or an alloca with swifterror attribute. if (const Argument *Arg = dyn_cast(SV)) { if (Arg->hasSwiftErrorAttr()) return visitLoadFromSwiftError(I); } if (const AllocaInst *Alloca = dyn_cast(SV)) { if (Alloca->isSwiftError()) return visitLoadFromSwiftError(I); } } SDValue Ptr = getValue(SV); Type *Ty = I.getType(); SmallVector ValueVTs, MemVTs; SmallVector Offsets; ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &MemVTs, &Offsets); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; Align Alignment = I.getAlign(); AAMDNodes AAInfo = I.getAAMetadata(); const MDNode *Ranges = getRangeMetadata(I); bool isVolatile = I.isVolatile(); MachineMemOperand::Flags MMOFlags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout(), AC, LibInfo); SDValue Root; bool ConstantMemory = false; if (isVolatile) // Serialize volatile loads with other side effects. Root = getRoot(); else if (NumValues > MaxParallelChains) Root = getMemoryRoot(); else if (AA && AA->pointsToConstantMemory(MemoryLocation( SV, LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)), AAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. Root = DAG.getEntryNode(); ConstantMemory = true; MMOFlags |= MachineMemOperand::MOInvariant; } else { // Do not serialize non-volatile loads against each other. Root = DAG.getRoot(); } SDLoc dl = getCurSDLoc(); if (isVolatile) Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG); SmallVector Values(NumValues); SmallVector Chains(std::min(MaxParallelChains, NumValues)); unsigned ChainI = 0; for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { // Serializing loads here may result in excessive register pressure, and // TokenFactor places arbitrary choke points on the scheduler. SD scheduling // could recover a bit by hoisting nodes upward in the chain by recognizing // they are side-effect free or do not alias. The optimizer should really // avoid this case by converting large object/array copies to llvm.memcpy // (MaxParallelChains should always remain as failsafe). if (ChainI == MaxParallelChains) { assert(PendingLoads.empty() && "PendingLoads must be serialized first"); SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ArrayRef(Chains.data(), ChainI)); Root = Chain; ChainI = 0; } // TODO: MachinePointerInfo only supports a fixed length offset. MachinePointerInfo PtrInfo = !Offsets[i].isScalable() || Offsets[i].isZero() ? MachinePointerInfo(SV, Offsets[i].getKnownMinValue()) : MachinePointerInfo(); SDValue A = DAG.getObjectPtrOffset(dl, Ptr, Offsets[i]); SDValue L = DAG.getLoad(MemVTs[i], dl, Root, A, PtrInfo, Alignment, MMOFlags, AAInfo, Ranges); Chains[ChainI] = L.getValue(1); if (MemVTs[i] != ValueVTs[i]) L = DAG.getPtrExtOrTrunc(L, dl, ValueVTs[i]); Values[i] = L; } if (!ConstantMemory) { SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ArrayRef(Chains.data(), ChainI)); if (isVolatile) DAG.setRoot(Chain); else PendingLoads.push_back(Chain); } setValue(&I, DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values)); } void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) { assert(DAG.getTargetLoweringInfo().supportSwiftError() && "call visitStoreToSwiftError when backend supports swifterror"); SmallVector ValueVTs; SmallVector Offsets; const Value *SrcV = I.getOperand(0); ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), SrcV->getType(), ValueVTs, &Offsets, 0); assert(ValueVTs.size() == 1 && Offsets[0] == 0 && "expect a single EVT for swifterror"); SDValue Src = getValue(SrcV); // Create a virtual register, then update the virtual register. Register VReg = SwiftError.getOrCreateVRegDefAt(&I, FuncInfo.MBB, I.getPointerOperand()); // Chain, DL, Reg, N or Chain, DL, Reg, N, Glue // Chain can be getRoot or getControlRoot. SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg, SDValue(Src.getNode(), Src.getResNo())); DAG.setRoot(CopyNode); } void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) { assert(DAG.getTargetLoweringInfo().supportSwiftError() && "call visitLoadFromSwiftError when backend supports swifterror"); assert(!I.isVolatile() && !I.hasMetadata(LLVMContext::MD_nontemporal) && !I.hasMetadata(LLVMContext::MD_invariant_load) && "Support volatile, non temporal, invariant for load_from_swift_error"); const Value *SV = I.getOperand(0); Type *Ty = I.getType(); assert( (!AA || !AA->pointsToConstantMemory(MemoryLocation( SV, LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)), I.getAAMetadata()))) && "load_from_swift_error should not be constant memory"); SmallVector ValueVTs; SmallVector Offsets; ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), Ty, ValueVTs, &Offsets, 0); assert(ValueVTs.size() == 1 && Offsets[0] == 0 && "expect a single EVT for swifterror"); // Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT SDValue L = DAG.getCopyFromReg( getRoot(), getCurSDLoc(), SwiftError.getOrCreateVRegUseAt(&I, FuncInfo.MBB, SV), ValueVTs[0]); setValue(&I, L); } void SelectionDAGBuilder::visitStore(const StoreInst &I) { if (I.isAtomic()) return visitAtomicStore(I); const Value *SrcV = I.getOperand(0); const Value *PtrV = I.getOperand(1); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (TLI.supportSwiftError()) { // Swifterror values can come from either a function parameter with // swifterror attribute or an alloca with swifterror attribute. if (const Argument *Arg = dyn_cast(PtrV)) { if (Arg->hasSwiftErrorAttr()) return visitStoreToSwiftError(I); } if (const AllocaInst *Alloca = dyn_cast(PtrV)) { if (Alloca->isSwiftError()) return visitStoreToSwiftError(I); } } SmallVector ValueVTs, MemVTs; SmallVector Offsets; ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), SrcV->getType(), ValueVTs, &MemVTs, &Offsets); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; // Get the lowered operands. Note that we do this after // checking if NumResults is zero, because with zero results // the operands won't have values in the map. SDValue Src = getValue(SrcV); SDValue Ptr = getValue(PtrV); SDValue Root = I.isVolatile() ? getRoot() : getMemoryRoot(); SmallVector Chains(std::min(MaxParallelChains, NumValues)); SDLoc dl = getCurSDLoc(); Align Alignment = I.getAlign(); AAMDNodes AAInfo = I.getAAMetadata(); auto MMOFlags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout()); unsigned ChainI = 0; for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { // See visitLoad comments. if (ChainI == MaxParallelChains) { SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ArrayRef(Chains.data(), ChainI)); Root = Chain; ChainI = 0; } // TODO: MachinePointerInfo only supports a fixed length offset. MachinePointerInfo PtrInfo = !Offsets[i].isScalable() || Offsets[i].isZero() ? MachinePointerInfo(PtrV, Offsets[i].getKnownMinValue()) : MachinePointerInfo(); SDValue Add = DAG.getObjectPtrOffset(dl, Ptr, Offsets[i]); SDValue Val = SDValue(Src.getNode(), Src.getResNo() + i); if (MemVTs[i] != ValueVTs[i]) Val = DAG.getPtrExtOrTrunc(Val, dl, MemVTs[i]); SDValue St = DAG.getStore(Root, dl, Val, Add, PtrInfo, Alignment, MMOFlags, AAInfo); Chains[ChainI] = St; } SDValue StoreNode = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ArrayRef(Chains.data(), ChainI)); setValue(&I, StoreNode); DAG.setRoot(StoreNode); } void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, bool IsCompressing) { SDLoc sdl = getCurSDLoc(); auto getMaskedStoreOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0, Align &Alignment) { // llvm.masked.store.*(Src0, Ptr, alignment, Mask) Src0 = I.getArgOperand(0); Ptr = I.getArgOperand(1); Alignment = cast(I.getArgOperand(2))->getAlignValue(); Mask = I.getArgOperand(3); }; auto getCompressingStoreOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0, Align &Alignment) { // llvm.masked.compressstore.*(Src0, Ptr, Mask) Src0 = I.getArgOperand(0); Ptr = I.getArgOperand(1); Mask = I.getArgOperand(2); Alignment = I.getParamAlign(1).valueOrOne(); }; Value *PtrOperand, *MaskOperand, *Src0Operand; Align Alignment; if (IsCompressing) getCompressingStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment); else getMaskedStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment); SDValue Ptr = getValue(PtrOperand); SDValue Src0 = getValue(Src0Operand); SDValue Mask = getValue(MaskOperand); SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); EVT VT = Src0.getValueType(); auto MMOFlags = MachineMemOperand::MOStore; if (I.hasMetadata(LLVMContext::MD_nontemporal)) MMOFlags |= MachineMemOperand::MONonTemporal; MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(PtrOperand), MMOFlags, LocationSize::beforeOrAfterPointer(), Alignment, I.getAAMetadata()); const auto &TLI = DAG.getTargetLoweringInfo(); const auto &TTI = TLI.getTargetMachine().getTargetTransformInfo(*I.getFunction()); SDValue StoreNode = !IsCompressing && TTI.hasConditionalLoadStoreForType(I.getArgOperand(0)->getType()) ? TLI.visitMaskedStore(DAG, sdl, getMemoryRoot(), MMO, Ptr, Src0, Mask) : DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO, ISD::UNINDEXED, /*Truncating=*/false, IsCompressing); DAG.setRoot(StoreNode); setValue(&I, StoreNode); } // Get a uniform base for the Gather/Scatter intrinsic. // The first argument of the Gather/Scatter intrinsic is a vector of pointers. // We try to represent it as a base pointer + vector of indices. // Usually, the vector of pointers comes from a 'getelementptr' instruction. // The first operand of the GEP may be a single pointer or a vector of pointers // Example: // %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind // or // %gep.ptr = getelementptr i32, i32* %ptr, <8 x i32> %ind // %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, .. // // When the first GEP operand is a single pointer - it is the uniform base we // are looking for. If first operand of the GEP is a splat vector - we // extract the splat value and use it as a uniform base. // In all other cases the function returns 'false'. static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index, ISD::MemIndexType &IndexType, SDValue &Scale, SelectionDAGBuilder *SDB, const BasicBlock *CurBB, uint64_t ElemSize) { SelectionDAG& DAG = SDB->DAG; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const DataLayout &DL = DAG.getDataLayout(); assert(Ptr->getType()->isVectorTy() && "Unexpected pointer type"); // Handle splat constant pointer. if (auto *C = dyn_cast(Ptr)) { C = C->getSplatValue(); if (!C) return false; Base = SDB->getValue(C); ElementCount NumElts = cast(Ptr->getType())->getElementCount(); EVT VT = EVT::getVectorVT(*DAG.getContext(), TLI.getPointerTy(DL), NumElts); Index = DAG.getConstant(0, SDB->getCurSDLoc(), VT); IndexType = ISD::SIGNED_SCALED; Scale = DAG.getTargetConstant(1, SDB->getCurSDLoc(), TLI.getPointerTy(DL)); return true; } const GetElementPtrInst *GEP = dyn_cast(Ptr); if (!GEP || GEP->getParent() != CurBB) return false; if (GEP->getNumOperands() != 2) return false; const Value *BasePtr = GEP->getPointerOperand(); const Value *IndexVal = GEP->getOperand(GEP->getNumOperands() - 1); // Make sure the base is scalar and the index is a vector. if (BasePtr->getType()->isVectorTy() || !IndexVal->getType()->isVectorTy()) return false; TypeSize ScaleVal = DL.getTypeAllocSize(GEP->getResultElementType()); if (ScaleVal.isScalable()) return false; // Target may not support the required addressing mode. if (ScaleVal != 1 && !TLI.isLegalScaleForGatherScatter(ScaleVal.getFixedValue(), ElemSize)) return false; Base = SDB->getValue(BasePtr); Index = SDB->getValue(IndexVal); IndexType = ISD::SIGNED_SCALED; Scale = DAG.getTargetConstant(ScaleVal, SDB->getCurSDLoc(), TLI.getPointerTy(DL)); return true; } void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { SDLoc sdl = getCurSDLoc(); // llvm.masked.scatter.*(Src0, Ptrs, alignment, Mask) const Value *Ptr = I.getArgOperand(1); SDValue Src0 = getValue(I.getArgOperand(0)); SDValue Mask = getValue(I.getArgOperand(3)); EVT VT = Src0.getValueType(); Align Alignment = cast(I.getArgOperand(2)) ->getMaybeAlignValue() .value_or(DAG.getEVTAlign(VT.getScalarType())); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Base; SDValue Index; ISD::MemIndexType IndexType; SDValue Scale; bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this, I.getParent(), VT.getScalarStoreSize()); unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace(); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(AS), MachineMemOperand::MOStore, LocationSize::beforeOrAfterPointer(), Alignment, I.getAAMetadata()); if (!UniformBase) { Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); IndexType = ISD::SIGNED_SCALED; Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); } EVT IdxVT = Index.getValueType(); EVT EltTy = IdxVT.getVectorElementType(); if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) { EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy); Index = DAG.getNode(ISD::SIGN_EXTEND, sdl, NewIdxVT, Index); } SDValue Ops[] = { getMemoryRoot(), Src0, Mask, Base, Index, Scale }; SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl, Ops, MMO, IndexType, false); DAG.setRoot(Scatter); setValue(&I, Scatter); } void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { SDLoc sdl = getCurSDLoc(); auto getMaskedLoadOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0, Align &Alignment) { // @llvm.masked.load.*(Ptr, alignment, Mask, Src0) Ptr = I.getArgOperand(0); Alignment = cast(I.getArgOperand(1))->getAlignValue(); Mask = I.getArgOperand(2); Src0 = I.getArgOperand(3); }; auto getExpandingLoadOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0, Align &Alignment) { // @llvm.masked.expandload.*(Ptr, Mask, Src0) Ptr = I.getArgOperand(0); Alignment = I.getParamAlign(0).valueOrOne(); Mask = I.getArgOperand(1); Src0 = I.getArgOperand(2); }; Value *PtrOperand, *MaskOperand, *Src0Operand; Align Alignment; if (IsExpanding) getExpandingLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment); else getMaskedLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment); SDValue Ptr = getValue(PtrOperand); SDValue Src0 = getValue(Src0Operand); SDValue Mask = getValue(MaskOperand); SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); EVT VT = Src0.getValueType(); AAMDNodes AAInfo = I.getAAMetadata(); const MDNode *Ranges = getRangeMetadata(I); // Do not serialize masked loads of constant memory with anything. MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo); bool AddToChain = !AA || !AA->pointsToConstantMemory(ML); SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); auto MMOFlags = MachineMemOperand::MOLoad; if (I.hasMetadata(LLVMContext::MD_nontemporal)) MMOFlags |= MachineMemOperand::MONonTemporal; MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(PtrOperand), MMOFlags, LocationSize::beforeOrAfterPointer(), Alignment, AAInfo, Ranges); const auto &TLI = DAG.getTargetLoweringInfo(); const auto &TTI = TLI.getTargetMachine().getTargetTransformInfo(*I.getFunction()); // The Load/Res may point to different values and both of them are output // variables. SDValue Load; SDValue Res; if (!IsExpanding && TTI.hasConditionalLoadStoreForType(Src0Operand->getType())) Res = TLI.visitMaskedLoad(DAG, sdl, InChain, MMO, Load, Ptr, Src0, Mask); else Res = Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Offset, Mask, Src0, VT, MMO, ISD::UNINDEXED, ISD::NON_EXTLOAD, IsExpanding); if (AddToChain) PendingLoads.push_back(Load.getValue(1)); setValue(&I, Res); } void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { SDLoc sdl = getCurSDLoc(); // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0) const Value *Ptr = I.getArgOperand(0); SDValue Src0 = getValue(I.getArgOperand(3)); SDValue Mask = getValue(I.getArgOperand(2)); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); Align Alignment = cast(I.getArgOperand(1)) ->getMaybeAlignValue() .value_or(DAG.getEVTAlign(VT.getScalarType())); const MDNode *Ranges = getRangeMetadata(I); SDValue Root = DAG.getRoot(); SDValue Base; SDValue Index; ISD::MemIndexType IndexType; SDValue Scale; bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this, I.getParent(), VT.getScalarStoreSize()); unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace(); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(AS), MachineMemOperand::MOLoad, LocationSize::beforeOrAfterPointer(), Alignment, I.getAAMetadata(), Ranges); if (!UniformBase) { Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); IndexType = ISD::SIGNED_SCALED; Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); } EVT IdxVT = Index.getValueType(); EVT EltTy = IdxVT.getVectorElementType(); if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) { EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy); Index = DAG.getNode(ISD::SIGN_EXTEND, sdl, NewIdxVT, Index); } SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale }; SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl, Ops, MMO, IndexType, ISD::NON_EXTLOAD); PendingLoads.push_back(Gather.getValue(1)); setValue(&I, Gather); } void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { SDLoc dl = getCurSDLoc(); AtomicOrdering SuccessOrdering = I.getSuccessOrdering(); AtomicOrdering FailureOrdering = I.getFailureOrdering(); SyncScope::ID SSID = I.getSyncScopeID(); SDValue InChain = getRoot(); MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType(); SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); auto Flags = TLI.getAtomicMemOperandFlags(I, DAG.getDataLayout()); MachineFunction &MF = DAG.getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand( MachinePointerInfo(I.getPointerOperand()), Flags, LocationSize::precise(MemVT.getStoreSize()), DAG.getEVTAlign(MemVT), AAMDNodes(), nullptr, SSID, SuccessOrdering, FailureOrdering); SDValue L = DAG.getAtomicCmpSwap(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain, getValue(I.getPointerOperand()), getValue(I.getCompareOperand()), getValue(I.getNewValOperand()), MMO); SDValue OutChain = L.getValue(2); setValue(&I, L); DAG.setRoot(OutChain); } void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { SDLoc dl = getCurSDLoc(); ISD::NodeType NT; switch (I.getOperation()) { default: llvm_unreachable("Unknown atomicrmw operation"); case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break; case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break; case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break; case AtomicRMWInst::And: NT = ISD::ATOMIC_LOAD_AND; break; case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break; case AtomicRMWInst::Or: NT = ISD::ATOMIC_LOAD_OR; break; case AtomicRMWInst::Xor: NT = ISD::ATOMIC_LOAD_XOR; break; case AtomicRMWInst::Max: NT = ISD::ATOMIC_LOAD_MAX; break; case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break; case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break; case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break; case AtomicRMWInst::FAdd: NT = ISD::ATOMIC_LOAD_FADD; break; case AtomicRMWInst::FSub: NT = ISD::ATOMIC_LOAD_FSUB; break; case AtomicRMWInst::FMax: NT = ISD::ATOMIC_LOAD_FMAX; break; case AtomicRMWInst::FMin: NT = ISD::ATOMIC_LOAD_FMIN; break; case AtomicRMWInst::UIncWrap: NT = ISD::ATOMIC_LOAD_UINC_WRAP; break; case AtomicRMWInst::UDecWrap: NT = ISD::ATOMIC_LOAD_UDEC_WRAP; break; } AtomicOrdering Ordering = I.getOrdering(); SyncScope::ID SSID = I.getSyncScopeID(); SDValue InChain = getRoot(); auto MemVT = getValue(I.getValOperand()).getSimpleValueType(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); auto Flags = TLI.getAtomicMemOperandFlags(I, DAG.getDataLayout()); MachineFunction &MF = DAG.getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand( MachinePointerInfo(I.getPointerOperand()), Flags, LocationSize::precise(MemVT.getStoreSize()), DAG.getEVTAlign(MemVT), AAMDNodes(), nullptr, SSID, Ordering); SDValue L = DAG.getAtomic(NT, dl, MemVT, InChain, getValue(I.getPointerOperand()), getValue(I.getValOperand()), MMO); SDValue OutChain = L.getValue(1); setValue(&I, L); DAG.setRoot(OutChain); } void SelectionDAGBuilder::visitFence(const FenceInst &I) { SDLoc dl = getCurSDLoc(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Ops[3]; Ops[0] = getRoot(); Ops[1] = DAG.getTargetConstant((unsigned)I.getOrdering(), dl, TLI.getFenceOperandTy(DAG.getDataLayout())); Ops[2] = DAG.getTargetConstant(I.getSyncScopeID(), dl, TLI.getFenceOperandTy(DAG.getDataLayout())); SDValue N = DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops); setValue(&I, N); DAG.setRoot(N); } void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { SDLoc dl = getCurSDLoc(); AtomicOrdering Order = I.getOrdering(); SyncScope::ID SSID = I.getSyncScopeID(); SDValue InChain = getRoot(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getType()); if (!TLI.supportsUnalignedAtomics() && I.getAlign().value() < MemVT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic load"); auto Flags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout(), AC, LibInfo); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(I.getPointerOperand()), Flags, LocationSize::precise(MemVT.getStoreSize()), I.getAlign(), AAMDNodes(), nullptr, SSID, Order); InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG); SDValue Ptr = getValue(I.getPointerOperand()); SDValue L = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain, Ptr, MMO); SDValue OutChain = L.getValue(1); if (MemVT != VT) L = DAG.getPtrExtOrTrunc(L, dl, VT); setValue(&I, L); DAG.setRoot(OutChain); } void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { SDLoc dl = getCurSDLoc(); AtomicOrdering Ordering = I.getOrdering(); SyncScope::ID SSID = I.getSyncScopeID(); SDValue InChain = getRoot(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getValueOperand()->getType()); if (!TLI.supportsUnalignedAtomics() && I.getAlign().value() < MemVT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic store"); auto Flags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout()); MachineFunction &MF = DAG.getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand( MachinePointerInfo(I.getPointerOperand()), Flags, LocationSize::precise(MemVT.getStoreSize()), I.getAlign(), AAMDNodes(), nullptr, SSID, Ordering); SDValue Val = getValue(I.getValueOperand()); if (Val.getValueType() != MemVT) Val = DAG.getPtrExtOrTrunc(Val, dl, MemVT); SDValue Ptr = getValue(I.getPointerOperand()); SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, MemVT, InChain, Val, Ptr, MMO); setValue(&I, OutChain); DAG.setRoot(OutChain); } /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC /// node. void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic) { // Ignore the callsite's attributes. A specific call site may be marked with // readnone, but the lowering code will expect the chain based on the // definition. const Function *F = I.getCalledFunction(); bool HasChain = !F->doesNotAccessMemory(); bool OnlyLoad = HasChain && F->onlyReadsMemory(); // Build the operand list. SmallVector Ops; if (HasChain) { // If this intrinsic has side-effects, chainify it. if (OnlyLoad) { // We don't need to serialize loads against other loads. Ops.push_back(DAG.getRoot()); } else { Ops.push_back(getRoot()); } } // Info is set by getTgtMemIntrinsic TargetLowering::IntrinsicInfo Info; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, DAG.getMachineFunction(), Intrinsic); // Add the intrinsic ID as an integer operand if it's not a target intrinsic. if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || Info.opc == ISD::INTRINSIC_W_CHAIN) Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); // Add all operands of the call to the operand list. for (unsigned i = 0, e = I.arg_size(); i != e; ++i) { const Value *Arg = I.getArgOperand(i); if (!I.paramHasAttr(i, Attribute::ImmArg)) { Ops.push_back(getValue(Arg)); continue; } // Use TargetConstant instead of a regular constant for immarg. EVT VT = TLI.getValueType(DAG.getDataLayout(), Arg->getType(), true); if (const ConstantInt *CI = dyn_cast(Arg)) { assert(CI->getBitWidth() <= 64 && "large intrinsic immediates not handled"); Ops.push_back(DAG.getTargetConstant(*CI, SDLoc(), VT)); } else { Ops.push_back( DAG.getTargetConstantFP(*cast(Arg), SDLoc(), VT)); } } SmallVector ValueVTs; ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs); if (HasChain) ValueVTs.push_back(MVT::Other); SDVTList VTs = DAG.getVTList(ValueVTs); // Propagate fast-math-flags from IR to node(s). SDNodeFlags Flags; if (auto *FPMO = dyn_cast(&I)) Flags.copyFMF(*FPMO); SelectionDAG::FlagInserter FlagsInserter(DAG, Flags); // Create the node. SDValue Result; if (auto Bundle = I.getOperandBundle(LLVMContext::OB_convergencectrl)) { auto *Token = Bundle->Inputs[0].get(); SDValue ConvControlToken = getValue(Token); assert(Ops.back().getValueType() != MVT::Glue && "Did not expected another glue node here."); ConvControlToken = DAG.getNode(ISD::CONVERGENCECTRL_GLUE, {}, MVT::Glue, ConvControlToken); Ops.push_back(ConvControlToken); } // In some cases, custom collection of operands from CallInst I may be needed. TLI.CollectTargetIntrinsicOperands(I, Ops, DAG); if (IsTgtIntrinsic) { // This is target intrinsic that touches memory // // TODO: We currently just fallback to address space 0 if getTgtMemIntrinsic // didn't yield anything useful. MachinePointerInfo MPI; if (Info.ptrVal) MPI = MachinePointerInfo(Info.ptrVal, Info.offset); else if (Info.fallbackAddressSpace) MPI = MachinePointerInfo(*Info.fallbackAddressSpace); Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT, MPI, Info.align, Info.flags, Info.size, I.getAAMetadata()); } else if (!HasChain) { Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops); } else if (!I.getType()->isVoidTy()) { Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops); } else { Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops); } if (HasChain) { SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1); if (OnlyLoad) PendingLoads.push_back(Chain); else DAG.setRoot(Chain); } if (!I.getType()->isVoidTy()) { if (!isa(I.getType())) Result = lowerRangeToAssertZExt(DAG, I, Result); MaybeAlign Alignment = I.getRetAlign(); // Insert `assertalign` node if there's an alignment. if (InsertAssertAlign && Alignment) { Result = DAG.getAssertAlign(getCurSDLoc(), Result, Alignment.valueOrOne()); } } setValue(&I, Result); } /// GetSignificand - Get the significand and build it into a floating-point /// number with exponent of 1: /// /// Op = (Op & 0x007fffff) | 0x3f800000; /// /// where Op is the hexadecimal representation of floating point value. static SDValue GetSignificand(SelectionDAG &DAG, SDValue Op, const SDLoc &dl) { SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, DAG.getConstant(0x007fffff, dl, MVT::i32)); SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1, DAG.getConstant(0x3f800000, dl, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2); } /// GetExponent - Get the exponent: /// /// (float)(int)(((Op & 0x7f800000) >> 23) - 127); /// /// where Op is the hexadecimal representation of floating point value. static SDValue GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, const SDLoc &dl) { SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, DAG.getConstant(0x7f800000, dl, MVT::i32)); SDValue t1 = DAG.getNode( ISD::SRL, dl, MVT::i32, t0, DAG.getConstant(23, dl, TLI.getShiftAmountTy(MVT::i32, DAG.getDataLayout()))); SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1, DAG.getConstant(127, dl, MVT::i32)); return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2); } /// getF32Constant - Get 32-bit floating point constant. static SDValue getF32Constant(SelectionDAG &DAG, unsigned Flt, const SDLoc &dl) { return DAG.getConstantFP(APFloat(APFloat::IEEEsingle(), APInt(32, Flt)), dl, MVT::f32); } static SDValue getLimitedPrecisionExp2(SDValue t0, const SDLoc &dl, SelectionDAG &DAG) { // TODO: What fast-math-flags should be set on the floating-point nodes? // IntegerPartOfX = ((int32_t)(t0); SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); // FractionalPartOfX = t0 - (float)IntegerPartOfX; SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); // IntegerPartOfX <<= 23; IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, DAG.getConstant(23, dl, DAG.getTargetLoweringInfo().getShiftAmountTy( MVT::i32, DAG.getDataLayout()))); SDValue TwoToFractionalPartOfX; if (LimitFloatPrecision <= 6) { // For floating-point precision of 6: // // TwoToFractionalPartOfX = // 0.997535578f + // (0.735607626f + 0.252464424f * x) * x; // // error 0.0144103317, which is 6 bits SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0x3e814304, dl)); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, getF32Constant(DAG, 0x3f3c50c8, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x3f7f5e7e, dl)); } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // // TwoToFractionalPartOfX = // 0.999892986f + // (0.696457318f + // (0.224338339f + 0.792043434e-1f * x) * x) * x; // // error 0.000107046256, which is 13 to 14 bits SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0x3da235e3, dl)); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, getF32Constant(DAG, 0x3e65b8f3, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x3f324b07, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, getF32Constant(DAG, 0x3f7ff8fd, dl)); } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // // TwoToFractionalPartOfX = // 0.999999982f + // (0.693148872f + // (0.240227044f + // (0.554906021e-1f + // (0.961591928e-2f + // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; // error 2.47208000*10^(-7), which is better than 18 bits SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0x3924b03e, dl)); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, getF32Constant(DAG, 0x3ab24b87, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x3c1d8c17, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, getF32Constant(DAG, 0x3d634a1d, dl)); SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, getF32Constant(DAG, 0x3e75fe14, dl)); SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, getF32Constant(DAG, 0x3f317234, dl)); SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, getF32Constant(DAG, 0x3f800000, dl)); } // Add the exponent into the result in integer domain. SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFractionalPartOfX); return DAG.getNode(ISD::BITCAST, dl, MVT::f32, DAG.getNode(ISD::ADD, dl, MVT::i32, t13, IntegerPartOfX)); } /// expandExp - Lower an exp intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI, SDNodeFlags Flags) { if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { // Put the exponent in the right bit position for later addition to the // final result: // // t0 = Op * log2(e) // TODO: What fast-math-flags should be set here? SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, DAG.getConstantFP(numbers::log2ef, dl, MVT::f32)); return getLimitedPrecisionExp2(t0, dl, DAG); } // No special expansion. return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op, Flags); } /// expandLog - Lower a log intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI, SDNodeFlags Flags) { // TODO: What fast-math-flags should be set on the floating-point nodes? if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); // Scale the exponent by log(2). SDValue Exp = GetExponent(DAG, Op1, TLI, dl); SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, DAG.getConstantFP(numbers::ln2f, dl, MVT::f32)); // Get the significand and build it into a floating-point number with // exponent of 1. SDValue X = GetSignificand(DAG, Op1, dl); SDValue LogOfMantissa; if (LimitFloatPrecision <= 6) { // For floating-point precision of 6: // // LogofMantissa = // -1.1609546f + // (1.4034025f - 0.23903021f * x) * x; // // error 0.0034276066, which is better than 8 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0xbe74c456, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, getF32Constant(DAG, 0x3fb3a2b1, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, getF32Constant(DAG, 0x3f949a29, dl)); } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // // LogOfMantissa = // -1.7417939f + // (2.8212026f + // (-1.4699568f + // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x; // // error 0.000061011436, which is 14 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0xbd67b6d6, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, getF32Constant(DAG, 0x3ee4f4b8, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, getF32Constant(DAG, 0x3fbc278b, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x40348e95, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, getF32Constant(DAG, 0x3fdef31a, dl)); } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // // LogOfMantissa = // -2.1072184f + // (4.2372794f + // (-3.7029485f + // (2.2781945f + // (-0.87823314f + // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x; // // error 0.0000023660568, which is better than 18 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0xbc91e5ac, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, getF32Constant(DAG, 0x3e4350aa, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, getF32Constant(DAG, 0x3f60d3e3, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x4011cdf0, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, getF32Constant(DAG, 0x406cfd1c, dl)); SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, getF32Constant(DAG, 0x408797cb, dl)); SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, getF32Constant(DAG, 0x4006dcab, dl)); } return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa); } // No special expansion. return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op, Flags); } /// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI, SDNodeFlags Flags) { // TODO: What fast-math-flags should be set on the floating-point nodes? if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); // Get the exponent. SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl); // Get the significand and build it into a floating-point number with // exponent of 1. SDValue X = GetSignificand(DAG, Op1, dl); // Different possible minimax approximations of significand in // floating-point for various degrees of accuracy over [1,2]. SDValue Log2ofMantissa; if (LimitFloatPrecision <= 6) { // For floating-point precision of 6: // // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x; // // error 0.0049451742, which is more than 7 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0xbeb08fe0, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, getF32Constant(DAG, 0x40019463, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, getF32Constant(DAG, 0x3fd6633d, dl)); } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // // Log2ofMantissa = // -2.51285454f + // (4.07009056f + // (-2.12067489f + // (.645142248f - 0.816157886e-1f * x) * x) * x) * x; // // error 0.0000876136000, which is better than 13 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0xbda7262e, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, getF32Constant(DAG, 0x3f25280b, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, getF32Constant(DAG, 0x4007b923, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x40823e2f, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, getF32Constant(DAG, 0x4020d29c, dl)); } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // // Log2ofMantissa = // -3.0400495f + // (6.1129976f + // (-5.3420409f + // (3.2865683f + // (-1.2669343f + // (0.27515199f - // 0.25691327e-1f * x) * x) * x) * x) * x) * x; // // error 0.0000018516, which is better than 18 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0xbcd2769e, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, getF32Constant(DAG, 0x3e8ce0b9, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, getF32Constant(DAG, 0x3fa22ae7, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x40525723, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, getF32Constant(DAG, 0x40aaf200, dl)); SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, getF32Constant(DAG, 0x40c39dad, dl)); SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, getF32Constant(DAG, 0x4042902c, dl)); } return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa); } // No special expansion. return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op, Flags); } /// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI, SDNodeFlags Flags) { // TODO: What fast-math-flags should be set on the floating-point nodes? if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); // Scale the exponent by log10(2) [0.30102999f]. SDValue Exp = GetExponent(DAG, Op1, TLI, dl); SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, getF32Constant(DAG, 0x3e9a209a, dl)); // Get the significand and build it into a floating-point number with // exponent of 1. SDValue X = GetSignificand(DAG, Op1, dl); SDValue Log10ofMantissa; if (LimitFloatPrecision <= 6) { // For floating-point precision of 6: // // Log10ofMantissa = // -0.50419619f + // (0.60948995f - 0.10380950f * x) * x; // // error 0.0014886165, which is 6 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0xbdd49a13, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, getF32Constant(DAG, 0x3f1c0789, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, getF32Constant(DAG, 0x3f011300, dl)); } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // // Log10ofMantissa = // -0.64831180f + // (0.91751397f + // (-0.31664806f + 0.47637168e-1f * x) * x) * x; // // error 0.00019228036, which is better than 12 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0x3d431f31, dl)); SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, getF32Constant(DAG, 0x3ea21fb2, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, getF32Constant(DAG, 0x3f6ae232, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, getF32Constant(DAG, 0x3f25f7c3, dl)); } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // // Log10ofMantissa = // -0.84299375f + // (1.5327582f + // (-1.0688956f + // (0.49102474f + // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x; // // error 0.0000037995730, which is better than 18 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0x3c5d51ce, dl)); SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, getF32Constant(DAG, 0x3e00685a, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, getF32Constant(DAG, 0x3efb6798, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, getF32Constant(DAG, 0x3f88d192, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, getF32Constant(DAG, 0x3fc4316c, dl)); SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8, getF32Constant(DAG, 0x3f57ce70, dl)); } return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa); } // No special expansion. return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op, Flags); } /// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandExp2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI, SDNodeFlags Flags) { if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) return getLimitedPrecisionExp2(Op, dl, DAG); // No special expansion. return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op, Flags); } /// visitPow - Lower a pow intrinsic. Handles the special sequences for /// limited-precision mode with x == 10.0f. static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const TargetLowering &TLI, SDNodeFlags Flags) { bool IsExp10 = false; if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { if (ConstantFPSDNode *LHSC = dyn_cast(LHS)) { APFloat Ten(10.0f); IsExp10 = LHSC->isExactlyValue(Ten); } } // TODO: What fast-math-flags should be set on the FMUL node? if (IsExp10) { // Put the exponent in the right bit position for later addition to the // final result: // // #define LOG2OF10 3.3219281f // t0 = Op * LOG2OF10; SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS, getF32Constant(DAG, 0x40549a78, dl)); return getLimitedPrecisionExp2(t0, dl, DAG); } // No special expansion. return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS, Flags); } /// ExpandPowI - Expand a llvm.powi intrinsic. static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS, SelectionDAG &DAG) { // If RHS is a constant, we can expand this out to a multiplication tree if // it's beneficial on the target, otherwise we end up lowering to a call to // __powidf2 (for example). if (ConstantSDNode *RHSC = dyn_cast(RHS)) { unsigned Val = RHSC->getSExtValue(); // powi(x, 0) -> 1.0 if (Val == 0) return DAG.getConstantFP(1.0, DL, LHS.getValueType()); if (DAG.getTargetLoweringInfo().isBeneficialToExpandPowI( Val, DAG.shouldOptForSize())) { // Get the exponent as a positive value. if ((int)Val < 0) Val = -Val; // We use the simple binary decomposition method to generate the multiply // sequence. There are more optimal ways to do this (for example, // powi(x,15) generates one more multiply than it should), but this has // the benefit of being both really simple and much better than a libcall. SDValue Res; // Logically starts equal to 1.0 SDValue CurSquare = LHS; // TODO: Intrinsics should have fast-math-flags that propagate to these // nodes. while (Val) { if (Val & 1) { if (Res.getNode()) Res = DAG.getNode(ISD::FMUL, DL, Res.getValueType(), Res, CurSquare); else Res = CurSquare; // 1.0*CurSquare. } CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(), CurSquare, CurSquare); Val >>= 1; } // If the original was negative, invert the result, producing 1/(x*x*x). if (RHSC->getSExtValue() < 0) Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(), DAG.getConstantFP(1.0, DL, LHS.getValueType()), Res); return Res; } } // Otherwise, expand to a libcall. return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS); } static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue Scale, SelectionDAG &DAG, const TargetLowering &TLI) { EVT VT = LHS.getValueType(); bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT; bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT; LLVMContext &Ctx = *DAG.getContext(); // If the type is legal but the operation isn't, this node might survive all // the way to operation legalization. If we end up there and we do not have // the ability to widen the type (if VT*2 is not legal), we cannot expand the // node. // Coax the legalizer into expanding the node during type legalization instead // by bumping the size by one bit. This will force it to Promote, enabling the // early expansion and avoiding the need to expand later. // We don't have to do this if Scale is 0; that can always be expanded, unless // it's a saturating signed operation. Those can experience true integer // division overflow, a case which we must avoid. // FIXME: We wouldn't have to do this (or any of the early // expansion/promotion) if it was possible to expand a libcall of an // illegal type during operation legalization. But it's not, so things // get a bit hacky. unsigned ScaleInt = Scale->getAsZExtVal(); if ((ScaleInt > 0 || (Saturating && Signed)) && (TLI.isTypeLegal(VT) || (VT.isVector() && TLI.isTypeLegal(VT.getVectorElementType())))) { TargetLowering::LegalizeAction Action = TLI.getFixedPointOperationAction( Opcode, VT, ScaleInt); if (Action != TargetLowering::Legal && Action != TargetLowering::Custom) { EVT PromVT; if (VT.isScalarInteger()) PromVT = EVT::getIntegerVT(Ctx, VT.getSizeInBits() + 1); else if (VT.isVector()) { PromVT = VT.getVectorElementType(); PromVT = EVT::getIntegerVT(Ctx, PromVT.getSizeInBits() + 1); PromVT = EVT::getVectorVT(Ctx, PromVT, VT.getVectorElementCount()); } else llvm_unreachable("Wrong VT for DIVFIX?"); LHS = DAG.getExtOrTrunc(Signed, LHS, DL, PromVT); RHS = DAG.getExtOrTrunc(Signed, RHS, DL, PromVT); EVT ShiftTy = TLI.getShiftAmountTy(PromVT, DAG.getDataLayout()); // For saturating operations, we need to shift up the LHS to get the // proper saturation width, and then shift down again afterwards. if (Saturating) LHS = DAG.getNode(ISD::SHL, DL, PromVT, LHS, DAG.getConstant(1, DL, ShiftTy)); SDValue Res = DAG.getNode(Opcode, DL, PromVT, LHS, RHS, Scale); if (Saturating) Res = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, PromVT, Res, DAG.getConstant(1, DL, ShiftTy)); return DAG.getZExtOrTrunc(Res, DL, VT); } } return DAG.getNode(Opcode, DL, VT, LHS, RHS, Scale); } // getUnderlyingArgRegs - Find underlying registers used for a truncated, // bitcasted, or split argument. Returns a list of static void getUnderlyingArgRegs(SmallVectorImpl> &Regs, const SDValue &N) { switch (N.getOpcode()) { case ISD::CopyFromReg: { SDValue Op = N.getOperand(1); Regs.emplace_back(cast(Op)->getReg(), Op.getValueType().getSizeInBits()); return; } case ISD::BITCAST: case ISD::AssertZext: case ISD::AssertSext: case ISD::TRUNCATE: getUnderlyingArgRegs(Regs, N.getOperand(0)); return; case ISD::BUILD_PAIR: case ISD::BUILD_VECTOR: case ISD::CONCAT_VECTORS: for (SDValue Op : N->op_values()) getUnderlyingArgRegs(Regs, Op); return; default: return; } } /// If the DbgValueInst is a dbg_value of a function argument, create the /// corresponding DBG_VALUE machine instruction for it now. At the end of /// instruction selection, they will be inserted to the entry BB. /// We don't currently support this for variadic dbg_values, as they shouldn't /// appear for function arguments or in the prologue. bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( const Value *V, DILocalVariable *Variable, DIExpression *Expr, DILocation *DL, FuncArgumentDbgValueKind Kind, const SDValue &N) { const Argument *Arg = dyn_cast(V); if (!Arg) return false; MachineFunction &MF = DAG.getMachineFunction(); const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); // Helper to create DBG_INSTR_REFs or DBG_VALUEs, depending on what kind // we've been asked to pursue. auto MakeVRegDbgValue = [&](Register Reg, DIExpression *FragExpr, bool Indirect) { if (Reg.isVirtual() && MF.useDebugInstrRef()) { // For VRegs, in instruction referencing mode, create a DBG_INSTR_REF // pointing at the VReg, which will be patched up later. auto &Inst = TII->get(TargetOpcode::DBG_INSTR_REF); SmallVector MOs({MachineOperand::CreateReg( /* Reg */ Reg, /* isDef */ false, /* isImp */ false, /* isKill */ false, /* isDead */ false, /* isUndef */ false, /* isEarlyClobber */ false, /* SubReg */ 0, /* isDebug */ true)}); auto *NewDIExpr = FragExpr; // We don't have an "Indirect" field in DBG_INSTR_REF, fold that into // the DIExpression. if (Indirect) NewDIExpr = DIExpression::prepend(FragExpr, DIExpression::DerefBefore); SmallVector Ops({dwarf::DW_OP_LLVM_arg, 0}); NewDIExpr = DIExpression::prependOpcodes(NewDIExpr, Ops); return BuildMI(MF, DL, Inst, false, MOs, Variable, NewDIExpr); } else { // Create a completely standard DBG_VALUE. auto &Inst = TII->get(TargetOpcode::DBG_VALUE); return BuildMI(MF, DL, Inst, Indirect, Reg, Variable, FragExpr); } }; if (Kind == FuncArgumentDbgValueKind::Value) { // ArgDbgValues are hoisted to the beginning of the entry block. So we // should only emit as ArgDbgValue if the dbg.value intrinsic is found in // the entry block. bool IsInEntryBlock = FuncInfo.MBB == &FuncInfo.MF->front(); if (!IsInEntryBlock) return false; // ArgDbgValues are hoisted to the beginning of the entry block. So we // should only emit as ArgDbgValue if the dbg.value intrinsic describes a // variable that also is a param. // // Although, if we are at the top of the entry block already, we can still // emit using ArgDbgValue. This might catch some situations when the // dbg.value refers to an argument that isn't used in the entry block, so // any CopyToReg node would be optimized out and the only way to express // this DBG_VALUE is by using the physical reg (or FI) as done in this // method. ArgDbgValues are hoisted to the beginning of the entry block. So // we should only emit as ArgDbgValue if the Variable is an argument to the // current function, and the dbg.value intrinsic is found in the entry // block. bool VariableIsFunctionInputArg = Variable->isParameter() && !DL->getInlinedAt(); bool IsInPrologue = SDNodeOrder == LowestSDNodeOrder; if (!IsInPrologue && !VariableIsFunctionInputArg) return false; // Here we assume that a function argument on IR level only can be used to // describe one input parameter on source level. If we for example have // source code like this // // struct A { long x, y; }; // void foo(struct A a, long b) { // ... // b = a.x; // ... // } // // and IR like this // // define void @foo(i32 %a1, i32 %a2, i32 %b) { // entry: // call void @llvm.dbg.value(metadata i32 %a1, "a", DW_OP_LLVM_fragment // call void @llvm.dbg.value(metadata i32 %a2, "a", DW_OP_LLVM_fragment // call void @llvm.dbg.value(metadata i32 %b, "b", // ... // call void @llvm.dbg.value(metadata i32 %a1, "b" // ... // // then the last dbg.value is describing a parameter "b" using a value that // is an argument. But since we already has used %a1 to describe a parameter // we should not handle that last dbg.value here (that would result in an // incorrect hoisting of the DBG_VALUE to the function entry). // Notice that we allow one dbg.value per IR level argument, to accommodate // for the situation with fragments above. // If there is no node for the value being handled, we return true to skip // the normal generation of debug info, as it would kill existing debug // info for the parameter in case of duplicates. if (VariableIsFunctionInputArg) { unsigned ArgNo = Arg->getArgNo(); if (ArgNo >= FuncInfo.DescribedArgs.size()) FuncInfo.DescribedArgs.resize(ArgNo + 1, false); else if (!IsInPrologue && FuncInfo.DescribedArgs.test(ArgNo)) return !NodeMap[V].getNode(); FuncInfo.DescribedArgs.set(ArgNo); } } bool IsIndirect = false; std::optional Op; // Some arguments' frame index is recorded during argument lowering. int FI = FuncInfo.getArgumentFrameIndex(Arg); if (FI != std::numeric_limits::max()) Op = MachineOperand::CreateFI(FI); SmallVector, 8> ArgRegsAndSizes; if (!Op && N.getNode()) { getUnderlyingArgRegs(ArgRegsAndSizes, N); Register Reg; if (ArgRegsAndSizes.size() == 1) Reg = ArgRegsAndSizes.front().first; if (Reg && Reg.isVirtual()) { MachineRegisterInfo &RegInfo = MF.getRegInfo(); Register PR = RegInfo.getLiveInPhysReg(Reg); if (PR) Reg = PR; } if (Reg) { Op = MachineOperand::CreateReg(Reg, false); IsIndirect = Kind != FuncArgumentDbgValueKind::Value; } } if (!Op && N.getNode()) { // Check if frame index is available. SDValue LCandidate = peekThroughBitcasts(N); if (LoadSDNode *LNode = dyn_cast(LCandidate.getNode())) if (FrameIndexSDNode *FINode = dyn_cast(LNode->getBasePtr().getNode())) Op = MachineOperand::CreateFI(FINode->getIndex()); } if (!Op) { // Create a DBG_VALUE for each decomposed value in ArgRegs to cover Reg auto splitMultiRegDbgValue = [&](ArrayRef> SplitRegs) { unsigned Offset = 0; for (const auto &RegAndSize : SplitRegs) { // If the expression is already a fragment, the current register // offset+size might extend beyond the fragment. In this case, only // the register bits that are inside the fragment are relevant. int RegFragmentSizeInBits = RegAndSize.second; if (auto ExprFragmentInfo = Expr->getFragmentInfo()) { uint64_t ExprFragmentSizeInBits = ExprFragmentInfo->SizeInBits; // The register is entirely outside the expression fragment, // so is irrelevant for debug info. if (Offset >= ExprFragmentSizeInBits) break; // The register is partially outside the expression fragment, only // the low bits within the fragment are relevant for debug info. if (Offset + RegFragmentSizeInBits > ExprFragmentSizeInBits) { RegFragmentSizeInBits = ExprFragmentSizeInBits - Offset; } } auto FragmentExpr = DIExpression::createFragmentExpression( Expr, Offset, RegFragmentSizeInBits); Offset += RegAndSize.second; // If a valid fragment expression cannot be created, the variable's // correct value cannot be determined and so it is set as Undef. if (!FragmentExpr) { SDDbgValue *SDV = DAG.getConstantDbgValue( Variable, Expr, UndefValue::get(V->getType()), DL, SDNodeOrder); DAG.AddDbgValue(SDV, false); continue; } MachineInstr *NewMI = MakeVRegDbgValue(RegAndSize.first, *FragmentExpr, Kind != FuncArgumentDbgValueKind::Value); FuncInfo.ArgDbgValues.push_back(NewMI); } }; // Check if ValueMap has reg number. DenseMap::const_iterator VMI = FuncInfo.ValueMap.find(V); if (VMI != FuncInfo.ValueMap.end()) { const auto &TLI = DAG.getTargetLoweringInfo(); RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second, V->getType(), std::nullopt); if (RFV.occupiesMultipleRegs()) { splitMultiRegDbgValue(RFV.getRegsAndSizes()); return true; } Op = MachineOperand::CreateReg(VMI->second, false); IsIndirect = Kind != FuncArgumentDbgValueKind::Value; } else if (ArgRegsAndSizes.size() > 1) { // This was split due to the calling convention, and no virtual register // mapping exists for the value. splitMultiRegDbgValue(ArgRegsAndSizes); return true; } } if (!Op) return false; assert(Variable->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); MachineInstr *NewMI = nullptr; if (Op->isReg()) NewMI = MakeVRegDbgValue(Op->getReg(), Expr, IsIndirect); else NewMI = BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), true, *Op, Variable, Expr); // Otherwise, use ArgDbgValues. FuncInfo.ArgDbgValues.push_back(NewMI); return true; } /// Return the appropriate SDDbgValue based on N. SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N, DILocalVariable *Variable, DIExpression *Expr, const DebugLoc &dl, unsigned DbgSDNodeOrder) { if (auto *FISDN = dyn_cast(N.getNode())) { // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe // stack slot locations. // // Consider "int x = 0; int *px = &x;". There are two kinds of interesting // debug values here after optimization: // // dbg.value(i32* %px, !"int *px", !DIExpression()), and // dbg.value(i32* %px, !"int x", !DIExpression(DW_OP_deref)) // // Both describe the direct values of their associated variables. return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(), /*IsIndirect*/ false, dl, DbgSDNodeOrder); } return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), /*IsIndirect*/ false, dl, DbgSDNodeOrder); } static unsigned FixedPointIntrinsicToOpcode(unsigned Intrinsic) { switch (Intrinsic) { case Intrinsic::smul_fix: return ISD::SMULFIX; case Intrinsic::umul_fix: return ISD::UMULFIX; case Intrinsic::smul_fix_sat: return ISD::SMULFIXSAT; case Intrinsic::umul_fix_sat: return ISD::UMULFIXSAT; case Intrinsic::sdiv_fix: return ISD::SDIVFIX; case Intrinsic::udiv_fix: return ISD::UDIVFIX; case Intrinsic::sdiv_fix_sat: return ISD::SDIVFIXSAT; case Intrinsic::udiv_fix_sat: return ISD::UDIVFIXSAT; default: llvm_unreachable("Unhandled fixed point intrinsic"); } } void SelectionDAGBuilder::lowerCallToExternalSymbol(const CallInst &I, const char *FunctionName) { assert(FunctionName && "FunctionName must not be nullptr"); SDValue Callee = DAG.getExternalSymbol( FunctionName, DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout())); LowerCallTo(I, Callee, I.isTailCall(), I.isMustTailCall()); } /// Given a @llvm.call.preallocated.setup, return the corresponding /// preallocated call. static const CallBase *FindPreallocatedCall(const Value *PreallocatedSetup) { assert(cast(PreallocatedSetup) ->getCalledFunction() ->getIntrinsicID() == Intrinsic::call_preallocated_setup && "expected call_preallocated_setup Value"); for (const auto *U : PreallocatedSetup->users()) { auto *UseCall = cast(U); const Function *Fn = UseCall->getCalledFunction(); if (!Fn || Fn->getIntrinsicID() != Intrinsic::call_preallocated_arg) { return UseCall; } } llvm_unreachable("expected corresponding call to preallocated setup/arg"); } /// If DI is a debug value with an EntryValue expression, lower it using the /// corresponding physical register of the associated Argument value /// (guaranteed to exist by the verifier). bool SelectionDAGBuilder::visitEntryValueDbgValue( ArrayRef Values, DILocalVariable *Variable, DIExpression *Expr, DebugLoc DbgLoc) { if (!Expr->isEntryValue() || !hasSingleElement(Values)) return false; // These properties are guaranteed by the verifier. const Argument *Arg = cast(Values[0]); assert(Arg->hasAttribute(Attribute::AttrKind::SwiftAsync)); auto ArgIt = FuncInfo.ValueMap.find(Arg); if (ArgIt == FuncInfo.ValueMap.end()) { LLVM_DEBUG( dbgs() << "Dropping dbg.value: expression is entry_value but " "couldn't find an associated register for the Argument\n"); return true; } Register ArgVReg = ArgIt->getSecond(); for (auto [PhysReg, VirtReg] : FuncInfo.RegInfo->liveins()) if (ArgVReg == VirtReg || ArgVReg == PhysReg) { SDDbgValue *SDV = DAG.getVRegDbgValue( Variable, Expr, PhysReg, false /*IsIndidrect*/, DbgLoc, SDNodeOrder); DAG.AddDbgValue(SDV, false /*treat as dbg.declare byval parameter*/); return true; } LLVM_DEBUG(dbgs() << "Dropping dbg.value: expression is entry_value but " "couldn't find a physical register\n"); return true; } /// Lower the call to the specified intrinsic function. void SelectionDAGBuilder::visitConvergenceControl(const CallInst &I, unsigned Intrinsic) { SDLoc sdl = getCurSDLoc(); switch (Intrinsic) { case Intrinsic::experimental_convergence_anchor: setValue(&I, DAG.getNode(ISD::CONVERGENCECTRL_ANCHOR, sdl, MVT::Untyped)); break; case Intrinsic::experimental_convergence_entry: setValue(&I, DAG.getNode(ISD::CONVERGENCECTRL_ENTRY, sdl, MVT::Untyped)); break; case Intrinsic::experimental_convergence_loop: { auto Bundle = I.getOperandBundle(LLVMContext::OB_convergencectrl); auto *Token = Bundle->Inputs[0].get(); setValue(&I, DAG.getNode(ISD::CONVERGENCECTRL_LOOP, sdl, MVT::Untyped, getValue(Token))); break; } } } void SelectionDAGBuilder::visitVectorHistogram(const CallInst &I, unsigned IntrinsicID) { // For now, we're only lowering an 'add' histogram. // We can add others later, e.g. saturating adds, min/max. assert(IntrinsicID == Intrinsic::experimental_vector_histogram_add && "Tried to lower unsupported histogram type"); SDLoc sdl = getCurSDLoc(); Value *Ptr = I.getOperand(0); SDValue Inc = getValue(I.getOperand(1)); SDValue Mask = getValue(I.getOperand(2)); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); DataLayout TargetDL = DAG.getDataLayout(); EVT VT = Inc.getValueType(); Align Alignment = DAG.getEVTAlign(VT); const MDNode *Ranges = getRangeMetadata(I); SDValue Root = DAG.getRoot(); SDValue Base; SDValue Index; ISD::MemIndexType IndexType; SDValue Scale; bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this, I.getParent(), VT.getScalarStoreSize()); unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace(); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(AS), MachineMemOperand::MOLoad | MachineMemOperand::MOStore, MemoryLocation::UnknownSize, Alignment, I.getAAMetadata(), Ranges); if (!UniformBase) { Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); IndexType = ISD::SIGNED_SCALED; Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); } EVT IdxVT = Index.getValueType(); EVT EltTy = IdxVT.getVectorElementType(); if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) { EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy); Index = DAG.getNode(ISD::SIGN_EXTEND, sdl, NewIdxVT, Index); } SDValue ID = DAG.getTargetConstant(IntrinsicID, sdl, MVT::i32); SDValue Ops[] = {Root, Inc, Mask, Base, Index, Scale, ID}; SDValue Histogram = DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), VT, sdl, Ops, MMO, IndexType); setValue(&I, Histogram); DAG.setRoot(Histogram); } /// Lower the call to the specified intrinsic function. void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDLoc sdl = getCurSDLoc(); DebugLoc dl = getCurDebugLoc(); SDValue Res; SDNodeFlags Flags; if (auto *FPOp = dyn_cast(&I)) Flags.copyFMF(*FPOp); switch (Intrinsic) { default: // By default, turn this into a target intrinsic node. visitTargetIntrinsic(I, Intrinsic); return; case Intrinsic::vscale: { EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getVScale(sdl, VT, APInt(VT.getSizeInBits(), 1))); return; } case Intrinsic::vastart: visitVAStart(I); return; case Intrinsic::vaend: visitVAEnd(I); return; case Intrinsic::vacopy: visitVACopy(I); return; case Intrinsic::returnaddress: setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, TLI.getValueType(DAG.getDataLayout(), I.getType()), getValue(I.getArgOperand(0)))); return; case Intrinsic::addressofreturnaddress: setValue(&I, DAG.getNode(ISD::ADDROFRETURNADDR, sdl, TLI.getValueType(DAG.getDataLayout(), I.getType()))); return; case Intrinsic::sponentry: setValue(&I, DAG.getNode(ISD::SPONENTRY, sdl, TLI.getValueType(DAG.getDataLayout(), I.getType()))); return; case Intrinsic::frameaddress: setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getFrameIndexTy(DAG.getDataLayout()), getValue(I.getArgOperand(0)))); return; case Intrinsic::read_volatile_register: case Intrinsic::read_register: { Value *Reg = I.getArgOperand(0); SDValue Chain = getRoot(); SDValue RegName = DAG.getMDNode(cast(cast(Reg)->getMetadata())); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); Res = DAG.getNode(ISD::READ_REGISTER, sdl, DAG.getVTList(VT, MVT::Other), Chain, RegName); setValue(&I, Res); DAG.setRoot(Res.getValue(1)); return; } case Intrinsic::write_register: { Value *Reg = I.getArgOperand(0); Value *RegValue = I.getArgOperand(1); SDValue Chain = getRoot(); SDValue RegName = DAG.getMDNode(cast(cast(Reg)->getMetadata())); DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain, RegName, getValue(RegValue))); return; } case Intrinsic::memcpy: { const auto &MCI = cast(I); SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); // @llvm.memcpy defines 0 and 1 to both mean no alignment. Align DstAlign = MCI.getDestAlign().valueOrOne(); Align SrcAlign = MCI.getSourceAlign().valueOrOne(); Align Alignment = std::min(DstAlign, SrcAlign); bool isVol = MCI.isVolatile(); // FIXME: Support passing different dest/src alignments to the memcpy DAG // node. SDValue Root = isVol ? getRoot() : getMemoryRoot(); SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Alignment, isVol, /* AlwaysInline */ false, &I, std::nullopt, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1)), I.getAAMetadata(), AA); updateDAGForMaybeTailCall(MC); return; } case Intrinsic::memcpy_inline: { const auto &MCI = cast(I); SDValue Dst = getValue(I.getArgOperand(0)); SDValue Src = getValue(I.getArgOperand(1)); SDValue Size = getValue(I.getArgOperand(2)); assert(isa(Size) && "memcpy_inline needs constant size"); // @llvm.memcpy.inline defines 0 and 1 to both mean no alignment. Align DstAlign = MCI.getDestAlign().valueOrOne(); Align SrcAlign = MCI.getSourceAlign().valueOrOne(); Align Alignment = std::min(DstAlign, SrcAlign); bool isVol = MCI.isVolatile(); // FIXME: Support passing different dest/src alignments to the memcpy DAG // node. SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Alignment, isVol, /* AlwaysInline */ true, &I, std::nullopt, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1)), I.getAAMetadata(), AA); updateDAGForMaybeTailCall(MC); return; } case Intrinsic::memset: { const auto &MSI = cast(I); SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); // @llvm.memset defines 0 and 1 to both mean no alignment. Align Alignment = MSI.getDestAlign().valueOrOne(); bool isVol = MSI.isVolatile(); SDValue Root = isVol ? getRoot() : getMemoryRoot(); SDValue MS = DAG.getMemset( Root, sdl, Op1, Op2, Op3, Alignment, isVol, /* AlwaysInline */ false, &I, MachinePointerInfo(I.getArgOperand(0)), I.getAAMetadata()); updateDAGForMaybeTailCall(MS); return; } case Intrinsic::memset_inline: { const auto &MSII = cast(I); SDValue Dst = getValue(I.getArgOperand(0)); SDValue Value = getValue(I.getArgOperand(1)); SDValue Size = getValue(I.getArgOperand(2)); assert(isa(Size) && "memset_inline needs constant size"); // @llvm.memset defines 0 and 1 to both mean no alignment. Align DstAlign = MSII.getDestAlign().valueOrOne(); bool isVol = MSII.isVolatile(); SDValue Root = isVol ? getRoot() : getMemoryRoot(); SDValue MC = DAG.getMemset(Root, sdl, Dst, Value, Size, DstAlign, isVol, /* AlwaysInline */ true, &I, MachinePointerInfo(I.getArgOperand(0)), I.getAAMetadata()); updateDAGForMaybeTailCall(MC); return; } case Intrinsic::memmove: { const auto &MMI = cast(I); SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); // @llvm.memmove defines 0 and 1 to both mean no alignment. Align DstAlign = MMI.getDestAlign().valueOrOne(); Align SrcAlign = MMI.getSourceAlign().valueOrOne(); Align Alignment = std::min(DstAlign, SrcAlign); bool isVol = MMI.isVolatile(); // FIXME: Support passing different dest/src alignments to the memmove DAG // node. SDValue Root = isVol ? getRoot() : getMemoryRoot(); SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Alignment, isVol, &I, /* OverrideTailCall */ std::nullopt, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1)), I.getAAMetadata(), AA); updateDAGForMaybeTailCall(MM); return; } case Intrinsic::memcpy_element_unordered_atomic: { const AtomicMemCpyInst &MI = cast(I); SDValue Dst = getValue(MI.getRawDest()); SDValue Src = getValue(MI.getRawSource()); SDValue Length = getValue(MI.getLength()); Type *LengthTy = MI.getLength()->getType(); unsigned ElemSz = MI.getElementSizeInBytes(); bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); SDValue MC = DAG.getAtomicMemcpy(getRoot(), sdl, Dst, Src, Length, LengthTy, ElemSz, isTC, MachinePointerInfo(MI.getRawDest()), MachinePointerInfo(MI.getRawSource())); updateDAGForMaybeTailCall(MC); return; } case Intrinsic::memmove_element_unordered_atomic: { auto &MI = cast(I); SDValue Dst = getValue(MI.getRawDest()); SDValue Src = getValue(MI.getRawSource()); SDValue Length = getValue(MI.getLength()); Type *LengthTy = MI.getLength()->getType(); unsigned ElemSz = MI.getElementSizeInBytes(); bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); SDValue MC = DAG.getAtomicMemmove(getRoot(), sdl, Dst, Src, Length, LengthTy, ElemSz, isTC, MachinePointerInfo(MI.getRawDest()), MachinePointerInfo(MI.getRawSource())); updateDAGForMaybeTailCall(MC); return; } case Intrinsic::memset_element_unordered_atomic: { auto &MI = cast(I); SDValue Dst = getValue(MI.getRawDest()); SDValue Val = getValue(MI.getValue()); SDValue Length = getValue(MI.getLength()); Type *LengthTy = MI.getLength()->getType(); unsigned ElemSz = MI.getElementSizeInBytes(); bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); SDValue MC = DAG.getAtomicMemset(getRoot(), sdl, Dst, Val, Length, LengthTy, ElemSz, isTC, MachinePointerInfo(MI.getRawDest())); updateDAGForMaybeTailCall(MC); return; } case Intrinsic::call_preallocated_setup: { const CallBase *PreallocatedCall = FindPreallocatedCall(&I); SDValue SrcValue = DAG.getSrcValue(PreallocatedCall); SDValue Res = DAG.getNode(ISD::PREALLOCATED_SETUP, sdl, MVT::Other, getRoot(), SrcValue); setValue(&I, Res); DAG.setRoot(Res); return; } case Intrinsic::call_preallocated_arg: { const CallBase *PreallocatedCall = FindPreallocatedCall(I.getOperand(0)); SDValue SrcValue = DAG.getSrcValue(PreallocatedCall); SDValue Ops[3]; Ops[0] = getRoot(); Ops[1] = SrcValue; Ops[2] = DAG.getTargetConstant(*cast(I.getArgOperand(1)), sdl, MVT::i32); // arg index SDValue Res = DAG.getNode( ISD::PREALLOCATED_ARG, sdl, DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Ops); setValue(&I, Res); DAG.setRoot(Res.getValue(1)); return; } case Intrinsic::dbg_declare: { const auto &DI = cast(I); // Debug intrinsics are handled separately in assignment tracking mode. // Some intrinsics are handled right after Argument lowering. if (AssignmentTrackingEnabled || FuncInfo.PreprocessedDbgDeclares.count(&DI)) return; LLVM_DEBUG(dbgs() << "SelectionDAG visiting dbg_declare: " << DI << "\n"); DILocalVariable *Variable = DI.getVariable(); DIExpression *Expression = DI.getExpression(); dropDanglingDebugInfo(Variable, Expression); // Assume dbg.declare can not currently use DIArgList, i.e. // it is non-variadic. assert(!DI.hasArgList() && "Only dbg.value should currently use DIArgList"); handleDebugDeclare(DI.getVariableLocationOp(0), Variable, Expression, DI.getDebugLoc()); return; } case Intrinsic::dbg_label: { const DbgLabelInst &DI = cast(I); DILabel *Label = DI.getLabel(); assert(Label && "Missing label"); SDDbgLabel *SDV; SDV = DAG.getDbgLabel(Label, dl, SDNodeOrder); DAG.AddDbgLabel(SDV); return; } case Intrinsic::dbg_assign: { // Debug intrinsics are handled separately in assignment tracking mode. if (AssignmentTrackingEnabled) return; // If assignment tracking hasn't been enabled then fall through and treat // the dbg.assign as a dbg.value. [[fallthrough]]; } case Intrinsic::dbg_value: { // Debug intrinsics are handled separately in assignment tracking mode. if (AssignmentTrackingEnabled) return; const DbgValueInst &DI = cast(I); assert(DI.getVariable() && "Missing variable"); DILocalVariable *Variable = DI.getVariable(); DIExpression *Expression = DI.getExpression(); dropDanglingDebugInfo(Variable, Expression); if (DI.isKillLocation()) { handleKillDebugValue(Variable, Expression, DI.getDebugLoc(), SDNodeOrder); return; } SmallVector Values(DI.getValues()); if (Values.empty()) return; bool IsVariadic = DI.hasArgList(); if (!handleDebugValue(Values, Variable, Expression, DI.getDebugLoc(), SDNodeOrder, IsVariadic)) addDanglingDebugInfo(Values, Variable, Expression, IsVariadic, DI.getDebugLoc(), SDNodeOrder); return; } case Intrinsic::eh_typeid_for: { // Find the type id for the given typeinfo. GlobalValue *GV = ExtractTypeInfo(I.getArgOperand(0)); unsigned TypeID = DAG.getMachineFunction().getTypeIDFor(GV); Res = DAG.getConstant(TypeID, sdl, MVT::i32); setValue(&I, Res); return; } case Intrinsic::eh_return_i32: case Intrinsic::eh_return_i64: DAG.getMachineFunction().setCallsEHReturn(true); DAG.setRoot(DAG.getNode(ISD::EH_RETURN, sdl, MVT::Other, getControlRoot(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)))); return; case Intrinsic::eh_unwind_init: DAG.getMachineFunction().setCallsUnwindInit(true); return; case Intrinsic::eh_dwarf_cfa: setValue(&I, DAG.getNode(ISD::EH_DWARF_CFA, sdl, TLI.getPointerTy(DAG.getDataLayout()), getValue(I.getArgOperand(0)))); return; case Intrinsic::eh_sjlj_callsite: { MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); ConstantInt *CI = cast(I.getArgOperand(0)); assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!"); MMI.setCurrentCallSite(CI->getZExtValue()); return; } case Intrinsic::eh_sjlj_functioncontext: { // Get and store the index of the function context. MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); AllocaInst *FnCtx = cast(I.getArgOperand(0)->stripPointerCasts()); int FI = FuncInfo.StaticAllocaMap[FnCtx]; MFI.setFunctionContextIndex(FI); return; } case Intrinsic::eh_sjlj_setjmp: { SDValue Ops[2]; Ops[0] = getRoot(); Ops[1] = getValue(I.getArgOperand(0)); SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, sdl, DAG.getVTList(MVT::i32, MVT::Other), Ops); setValue(&I, Op.getValue(0)); DAG.setRoot(Op.getValue(1)); return; } case Intrinsic::eh_sjlj_longjmp: DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other, getRoot(), getValue(I.getArgOperand(0)))); return; case Intrinsic::eh_sjlj_setup_dispatch: DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other, getRoot())); return; case Intrinsic::masked_gather: visitMaskedGather(I); return; case Intrinsic::masked_load: visitMaskedLoad(I); return; case Intrinsic::masked_scatter: visitMaskedScatter(I); return; case Intrinsic::masked_store: visitMaskedStore(I); return; case Intrinsic::masked_expandload: visitMaskedLoad(I, true /* IsExpanding */); return; case Intrinsic::masked_compressstore: visitMaskedStore(I, true /* IsCompressing */); return; case Intrinsic::powi: setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), DAG)); return; case Intrinsic::log: setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags)); return; case Intrinsic::log2: setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags)); return; case Intrinsic::log10: setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags)); return; case Intrinsic::exp: setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags)); return; case Intrinsic::exp2: setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags)); return; case Intrinsic::pow: setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), DAG, TLI, Flags)); return; case Intrinsic::sqrt: case Intrinsic::fabs: case Intrinsic::sin: case Intrinsic::cos: case Intrinsic::tan: case Intrinsic::asin: case Intrinsic::acos: case Intrinsic::atan: case Intrinsic::sinh: case Intrinsic::cosh: case Intrinsic::tanh: case Intrinsic::exp10: case Intrinsic::floor: case Intrinsic::ceil: case Intrinsic::trunc: case Intrinsic::rint: case Intrinsic::nearbyint: case Intrinsic::round: case Intrinsic::roundeven: case Intrinsic::canonicalize: { unsigned Opcode; // clang-format off switch (Intrinsic) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; case Intrinsic::fabs: Opcode = ISD::FABS; break; case Intrinsic::sin: Opcode = ISD::FSIN; break; case Intrinsic::cos: Opcode = ISD::FCOS; break; case Intrinsic::tan: Opcode = ISD::FTAN; break; case Intrinsic::asin: Opcode = ISD::FASIN; break; case Intrinsic::acos: Opcode = ISD::FACOS; break; case Intrinsic::atan: Opcode = ISD::FATAN; break; case Intrinsic::sinh: Opcode = ISD::FSINH; break; case Intrinsic::cosh: Opcode = ISD::FCOSH; break; case Intrinsic::tanh: Opcode = ISD::FTANH; break; case Intrinsic::exp10: Opcode = ISD::FEXP10; break; case Intrinsic::floor: Opcode = ISD::FFLOOR; break; case Intrinsic::ceil: Opcode = ISD::FCEIL; break; case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; case Intrinsic::rint: Opcode = ISD::FRINT; break; case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; case Intrinsic::round: Opcode = ISD::FROUND; break; case Intrinsic::roundeven: Opcode = ISD::FROUNDEVEN; break; case Intrinsic::canonicalize: Opcode = ISD::FCANONICALIZE; break; } // clang-format on setValue(&I, DAG.getNode(Opcode, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), Flags)); return; } case Intrinsic::lround: case Intrinsic::llround: case Intrinsic::lrint: case Intrinsic::llrint: { unsigned Opcode; // clang-format off switch (Intrinsic) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. case Intrinsic::lround: Opcode = ISD::LROUND; break; case Intrinsic::llround: Opcode = ISD::LLROUND; break; case Intrinsic::lrint: Opcode = ISD::LRINT; break; case Intrinsic::llrint: Opcode = ISD::LLRINT; break; } // clang-format on EVT RetVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(Opcode, sdl, RetVT, getValue(I.getArgOperand(0)))); return; } case Intrinsic::minnum: setValue(&I, DAG.getNode(ISD::FMINNUM, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), Flags)); return; case Intrinsic::maxnum: setValue(&I, DAG.getNode(ISD::FMAXNUM, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), Flags)); return; case Intrinsic::minimum: setValue(&I, DAG.getNode(ISD::FMINIMUM, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), Flags)); return; case Intrinsic::maximum: setValue(&I, DAG.getNode(ISD::FMAXIMUM, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), Flags)); return; case Intrinsic::copysign: setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), Flags)); return; case Intrinsic::ldexp: setValue(&I, DAG.getNode(ISD::FLDEXP, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), Flags)); return; case Intrinsic::frexp: { SmallVector ValueVTs; ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs); SDVTList VTs = DAG.getVTList(ValueVTs); setValue(&I, DAG.getNode(ISD::FFREXP, sdl, VTs, getValue(I.getArgOperand(0)))); return; } case Intrinsic::arithmetic_fence: { setValue(&I, DAG.getNode(ISD::ARITH_FENCE, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), Flags)); return; } case Intrinsic::fma: setValue(&I, DAG.getNode( ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)), Flags)); return; #define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \ case Intrinsic::INTRINSIC: #include "llvm/IR/ConstrainedOps.def" visitConstrainedFPIntrinsic(cast(I)); return; #define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID: #include "llvm/IR/VPIntrinsics.def" visitVectorPredicationIntrinsic(cast(I)); return; case Intrinsic::fptrunc_round: { // Get the last argument, the metadata and convert it to an integer in the // call Metadata *MD = cast(I.getArgOperand(1))->getMetadata(); std::optional RoundMode = convertStrToRoundingMode(cast(MD)->getString()); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); // Propagate fast-math-flags from IR to node(s). SDNodeFlags Flags; Flags.copyFMF(*cast(&I)); SelectionDAG::FlagInserter FlagsInserter(DAG, Flags); SDValue Result; Result = DAG.getNode( ISD::FPTRUNC_ROUND, sdl, VT, getValue(I.getArgOperand(0)), DAG.getTargetConstant((int)*RoundMode, sdl, TLI.getPointerTy(DAG.getDataLayout()))); setValue(&I, Result); return; } case Intrinsic::fmuladd: { EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) { setValue(&I, DAG.getNode(ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)), Flags)); } else { // TODO: Intrinsic calls should have fast-math-flags. SDValue Mul = DAG.getNode( ISD::FMUL, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), Flags); SDValue Add = DAG.getNode(ISD::FADD, sdl, getValue(I.getArgOperand(0)).getValueType(), Mul, getValue(I.getArgOperand(2)), Flags); setValue(&I, Add); } return; } case Intrinsic::convert_to_fp16: setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16, DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16, getValue(I.getArgOperand(0)), DAG.getTargetConstant(0, sdl, MVT::i32)))); return; case Intrinsic::convert_from_fp16: setValue(&I, DAG.getNode(ISD::FP_EXTEND, sdl, TLI.getValueType(DAG.getDataLayout(), I.getType()), DAG.getNode(ISD::BITCAST, sdl, MVT::f16, getValue(I.getArgOperand(0))))); return; case Intrinsic::fptosi_sat: { EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(ISD::FP_TO_SINT_SAT, sdl, VT, getValue(I.getArgOperand(0)), DAG.getValueType(VT.getScalarType()))); return; } case Intrinsic::fptoui_sat: { EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(ISD::FP_TO_UINT_SAT, sdl, VT, getValue(I.getArgOperand(0)), DAG.getValueType(VT.getScalarType()))); return; } case Intrinsic::set_rounding: Res = DAG.getNode(ISD::SET_ROUNDING, sdl, MVT::Other, {getRoot(), getValue(I.getArgOperand(0))}); setValue(&I, Res); DAG.setRoot(Res.getValue(0)); return; case Intrinsic::is_fpclass: { const DataLayout DLayout = DAG.getDataLayout(); EVT DestVT = TLI.getValueType(DLayout, I.getType()); EVT ArgVT = TLI.getValueType(DLayout, I.getArgOperand(0)->getType()); FPClassTest Test = static_cast( cast(I.getArgOperand(1))->getZExtValue()); MachineFunction &MF = DAG.getMachineFunction(); const Function &F = MF.getFunction(); SDValue Op = getValue(I.getArgOperand(0)); SDNodeFlags Flags; Flags.setNoFPExcept( !F.getAttributes().hasFnAttr(llvm::Attribute::StrictFP)); // If ISD::IS_FPCLASS should be expanded, do it right now, because the // expansion can use illegal types. Making expansion early allows // legalizing these types prior to selection. if (!TLI.isOperationLegalOrCustom(ISD::IS_FPCLASS, ArgVT)) { SDValue Result = TLI.expandIS_FPCLASS(DestVT, Op, Test, Flags, sdl, DAG); setValue(&I, Result); return; } SDValue Check = DAG.getTargetConstant(Test, sdl, MVT::i32); SDValue V = DAG.getNode(ISD::IS_FPCLASS, sdl, DestVT, {Op, Check}, Flags); setValue(&I, V); return; } case Intrinsic::get_fpenv: { const DataLayout DLayout = DAG.getDataLayout(); EVT EnvVT = TLI.getValueType(DLayout, I.getType()); Align TempAlign = DAG.getEVTAlign(EnvVT); SDValue Chain = getRoot(); // Use GET_FPENV if it is legal or custom. Otherwise use memory-based node // and temporary storage in stack. if (TLI.isOperationLegalOrCustom(ISD::GET_FPENV, EnvVT)) { Res = DAG.getNode( ISD::GET_FPENV, sdl, DAG.getVTList(TLI.getValueType(DAG.getDataLayout(), I.getType()), MVT::Other), Chain); } else { SDValue Temp = DAG.CreateStackTemporary(EnvVT, TempAlign.value()); int SPFI = cast(Temp.getNode())->getIndex(); auto MPI = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MPI, MachineMemOperand::MOStore, LocationSize::beforeOrAfterPointer(), TempAlign); Chain = DAG.getGetFPEnv(Chain, sdl, Temp, EnvVT, MMO); Res = DAG.getLoad(EnvVT, sdl, Chain, Temp, MPI); } setValue(&I, Res); DAG.setRoot(Res.getValue(1)); return; } case Intrinsic::set_fpenv: { const DataLayout DLayout = DAG.getDataLayout(); SDValue Env = getValue(I.getArgOperand(0)); EVT EnvVT = Env.getValueType(); Align TempAlign = DAG.getEVTAlign(EnvVT); SDValue Chain = getRoot(); // If SET_FPENV is custom or legal, use it. Otherwise use loading // environment from memory. if (TLI.isOperationLegalOrCustom(ISD::SET_FPENV, EnvVT)) { Chain = DAG.getNode(ISD::SET_FPENV, sdl, MVT::Other, Chain, Env); } else { // Allocate space in stack, copy environment bits into it and use this // memory in SET_FPENV_MEM. SDValue Temp = DAG.CreateStackTemporary(EnvVT, TempAlign.value()); int SPFI = cast(Temp.getNode())->getIndex(); auto MPI = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); Chain = DAG.getStore(Chain, sdl, Env, Temp, MPI, TempAlign, MachineMemOperand::MOStore); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MPI, MachineMemOperand::MOLoad, LocationSize::beforeOrAfterPointer(), TempAlign); Chain = DAG.getSetFPEnv(Chain, sdl, Temp, EnvVT, MMO); } DAG.setRoot(Chain); return; } case Intrinsic::reset_fpenv: DAG.setRoot(DAG.getNode(ISD::RESET_FPENV, sdl, MVT::Other, getRoot())); return; case Intrinsic::get_fpmode: Res = DAG.getNode( ISD::GET_FPMODE, sdl, DAG.getVTList(TLI.getValueType(DAG.getDataLayout(), I.getType()), MVT::Other), DAG.getRoot()); setValue(&I, Res); DAG.setRoot(Res.getValue(1)); return; case Intrinsic::set_fpmode: Res = DAG.getNode(ISD::SET_FPMODE, sdl, MVT::Other, {DAG.getRoot()}, getValue(I.getArgOperand(0))); DAG.setRoot(Res); return; case Intrinsic::reset_fpmode: { Res = DAG.getNode(ISD::RESET_FPMODE, sdl, MVT::Other, getRoot()); DAG.setRoot(Res); return; } case Intrinsic::pcmarker: { SDValue Tmp = getValue(I.getArgOperand(0)); DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp)); return; } case Intrinsic::readcyclecounter: { SDValue Op = getRoot(); Res = DAG.getNode(ISD::READCYCLECOUNTER, sdl, DAG.getVTList(MVT::i64, MVT::Other), Op); setValue(&I, Res); DAG.setRoot(Res.getValue(1)); return; } case Intrinsic::readsteadycounter: { SDValue Op = getRoot(); Res = DAG.getNode(ISD::READSTEADYCOUNTER, sdl, DAG.getVTList(MVT::i64, MVT::Other), Op); setValue(&I, Res); DAG.setRoot(Res.getValue(1)); return; } case Intrinsic::bitreverse: setValue(&I, DAG.getNode(ISD::BITREVERSE, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)))); return; case Intrinsic::bswap: setValue(&I, DAG.getNode(ISD::BSWAP, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)))); return; case Intrinsic::cttz: { SDValue Arg = getValue(I.getArgOperand(0)); ConstantInt *CI = cast(I.getArgOperand(1)); EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF, sdl, Ty, Arg)); return; } case Intrinsic::ctlz: { SDValue Arg = getValue(I.getArgOperand(0)); ConstantInt *CI = cast(I.getArgOperand(1)); EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF, sdl, Ty, Arg)); return; } case Intrinsic::ctpop: { SDValue Arg = getValue(I.getArgOperand(0)); EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg)); return; } case Intrinsic::fshl: case Intrinsic::fshr: { bool IsFSHL = Intrinsic == Intrinsic::fshl; SDValue X = getValue(I.getArgOperand(0)); SDValue Y = getValue(I.getArgOperand(1)); SDValue Z = getValue(I.getArgOperand(2)); EVT VT = X.getValueType(); if (X == Y) { auto RotateOpcode = IsFSHL ? ISD::ROTL : ISD::ROTR; setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z)); } else { auto FunnelOpcode = IsFSHL ? ISD::FSHL : ISD::FSHR; setValue(&I, DAG.getNode(FunnelOpcode, sdl, VT, X, Y, Z)); } return; } case Intrinsic::sadd_sat: { SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); setValue(&I, DAG.getNode(ISD::SADDSAT, sdl, Op1.getValueType(), Op1, Op2)); return; } case Intrinsic::uadd_sat: { SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); setValue(&I, DAG.getNode(ISD::UADDSAT, sdl, Op1.getValueType(), Op1, Op2)); return; } case Intrinsic::ssub_sat: { SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); setValue(&I, DAG.getNode(ISD::SSUBSAT, sdl, Op1.getValueType(), Op1, Op2)); return; } case Intrinsic::usub_sat: { SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); setValue(&I, DAG.getNode(ISD::USUBSAT, sdl, Op1.getValueType(), Op1, Op2)); return; } case Intrinsic::sshl_sat: { SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); setValue(&I, DAG.getNode(ISD::SSHLSAT, sdl, Op1.getValueType(), Op1, Op2)); return; } case Intrinsic::ushl_sat: { SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); setValue(&I, DAG.getNode(ISD::USHLSAT, sdl, Op1.getValueType(), Op1, Op2)); return; } case Intrinsic::smul_fix: case Intrinsic::umul_fix: case Intrinsic::smul_fix_sat: case Intrinsic::umul_fix_sat: { SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); setValue(&I, DAG.getNode(FixedPointIntrinsicToOpcode(Intrinsic), sdl, Op1.getValueType(), Op1, Op2, Op3)); return; } case Intrinsic::sdiv_fix: case Intrinsic::udiv_fix: case Intrinsic::sdiv_fix_sat: case Intrinsic::udiv_fix_sat: { SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); setValue(&I, expandDivFix(FixedPointIntrinsicToOpcode(Intrinsic), sdl, Op1, Op2, Op3, DAG, TLI)); return; } case Intrinsic::smax: { SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); setValue(&I, DAG.getNode(ISD::SMAX, sdl, Op1.getValueType(), Op1, Op2)); return; } case Intrinsic::smin: { SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); setValue(&I, DAG.getNode(ISD::SMIN, sdl, Op1.getValueType(), Op1, Op2)); return; } case Intrinsic::umax: { SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); setValue(&I, DAG.getNode(ISD::UMAX, sdl, Op1.getValueType(), Op1, Op2)); return; } case Intrinsic::umin: { SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); setValue(&I, DAG.getNode(ISD::UMIN, sdl, Op1.getValueType(), Op1, Op2)); return; } case Intrinsic::abs: { // TODO: Preserve "int min is poison" arg in SDAG? SDValue Op1 = getValue(I.getArgOperand(0)); setValue(&I, DAG.getNode(ISD::ABS, sdl, Op1.getValueType(), Op1)); return; } case Intrinsic::scmp: { SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(ISD::SCMP, sdl, DestVT, Op1, Op2)); break; } case Intrinsic::ucmp: { SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(ISD::UCMP, sdl, DestVT, Op1, Op2)); break; } case Intrinsic::stacksave: { SDValue Op = getRoot(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); Res = DAG.getNode(ISD::STACKSAVE, sdl, DAG.getVTList(VT, MVT::Other), Op); setValue(&I, Res); DAG.setRoot(Res.getValue(1)); return; } case Intrinsic::stackrestore: Res = getValue(I.getArgOperand(0)); DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res)); return; case Intrinsic::get_dynamic_area_offset: { SDValue Op = getRoot(); EVT PtrTy = TLI.getFrameIndexTy(DAG.getDataLayout()); EVT ResTy = TLI.getValueType(DAG.getDataLayout(), I.getType()); // Result type for @llvm.get.dynamic.area.offset should match PtrTy for // target. if (PtrTy.getFixedSizeInBits() < ResTy.getFixedSizeInBits()) report_fatal_error("Wrong result type for @llvm.get.dynamic.area.offset" " intrinsic!"); Res = DAG.getNode(ISD::GET_DYNAMIC_AREA_OFFSET, sdl, DAG.getVTList(ResTy), Op); DAG.setRoot(Op); setValue(&I, Res); return; } case Intrinsic::stackguard: { MachineFunction &MF = DAG.getMachineFunction(); const Module &M = *MF.getFunction().getParent(); EVT PtrTy = TLI.getValueType(DAG.getDataLayout(), I.getType()); SDValue Chain = getRoot(); if (TLI.useLoadStackGuardNode()) { Res = getLoadStackGuard(DAG, sdl, Chain); Res = DAG.getPtrExtOrTrunc(Res, sdl, PtrTy); } else { const Value *Global = TLI.getSDagStackGuard(M); Align Align = DAG.getDataLayout().getPrefTypeAlign(Global->getType()); Res = DAG.getLoad(PtrTy, sdl, Chain, getValue(Global), MachinePointerInfo(Global, 0), Align, MachineMemOperand::MOVolatile); } if (TLI.useStackGuardXorFP()) Res = TLI.emitStackGuardXorFP(DAG, Res, sdl); DAG.setRoot(Chain); setValue(&I, Res); return; } case Intrinsic::stackprotector: { // Emit code into the DAG to store the stack guard onto the stack. MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); SDValue Src, Chain = getRoot(); if (TLI.useLoadStackGuardNode()) Src = getLoadStackGuard(DAG, sdl, Chain); else Src = getValue(I.getArgOperand(0)); // The guard's value. AllocaInst *Slot = cast(I.getArgOperand(1)); int FI = FuncInfo.StaticAllocaMap[Slot]; MFI.setStackProtectorIndex(FI); EVT PtrTy = TLI.getFrameIndexTy(DAG.getDataLayout()); SDValue FIN = DAG.getFrameIndex(FI, PtrTy); // Store the stack protector onto the stack. Res = DAG.getStore( Chain, sdl, Src, FIN, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), MaybeAlign(), MachineMemOperand::MOVolatile); setValue(&I, Res); DAG.setRoot(Res); return; } case Intrinsic::objectsize: llvm_unreachable("llvm.objectsize.* should have been lowered already"); case Intrinsic::is_constant: llvm_unreachable("llvm.is.constant.* should have been lowered already"); case Intrinsic::annotation: case Intrinsic::ptr_annotation: case Intrinsic::launder_invariant_group: case Intrinsic::strip_invariant_group: // Drop the intrinsic, but forward the value setValue(&I, getValue(I.getOperand(0))); return; case Intrinsic::assume: case Intrinsic::experimental_noalias_scope_decl: case Intrinsic::var_annotation: case Intrinsic::sideeffect: // Discard annotate attributes, noalias scope declarations, assumptions, and // artificial side-effects. return; case Intrinsic::codeview_annotation: { // Emit a label associated with this metadata. MachineFunction &MF = DAG.getMachineFunction(); MCSymbol *Label = MF.getContext().createTempSymbol("annotation", true); Metadata *MD = cast(I.getArgOperand(0))->getMetadata(); MF.addCodeViewAnnotation(Label, cast(MD)); Res = DAG.getLabelNode(ISD::ANNOTATION_LABEL, sdl, getRoot(), Label); DAG.setRoot(Res); return; } case Intrinsic::init_trampoline: { const Function *F = cast(I.getArgOperand(1)->stripPointerCasts()); SDValue Ops[6]; Ops[0] = getRoot(); Ops[1] = getValue(I.getArgOperand(0)); Ops[2] = getValue(I.getArgOperand(1)); Ops[3] = getValue(I.getArgOperand(2)); Ops[4] = DAG.getSrcValue(I.getArgOperand(0)); Ops[5] = DAG.getSrcValue(F); Res = DAG.getNode(ISD::INIT_TRAMPOLINE, sdl, MVT::Other, Ops); DAG.setRoot(Res); return; } case Intrinsic::adjust_trampoline: setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl, TLI.getPointerTy(DAG.getDataLayout()), getValue(I.getArgOperand(0)))); return; case Intrinsic::gcroot: { assert(DAG.getMachineFunction().getFunction().hasGC() && "only valid in functions with gc specified, enforced by Verifier"); assert(GFI && "implied by previous"); const Value *Alloca = I.getArgOperand(0)->stripPointerCasts(); const Constant *TypeMap = cast(I.getArgOperand(1)); FrameIndexSDNode *FI = cast(getValue(Alloca).getNode()); GFI->addStackRoot(FI->getIndex(), TypeMap); return; } case Intrinsic::gcread: case Intrinsic::gcwrite: llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!"); case Intrinsic::get_rounding: Res = DAG.getNode(ISD::GET_ROUNDING, sdl, {MVT::i32, MVT::Other}, getRoot()); setValue(&I, Res); DAG.setRoot(Res.getValue(1)); return; case Intrinsic::expect: // Just replace __builtin_expect(exp, c) with EXP. setValue(&I, getValue(I.getArgOperand(0))); return; case Intrinsic::ubsantrap: case Intrinsic::debugtrap: case Intrinsic::trap: { StringRef TrapFuncName = I.getAttributes().getFnAttr("trap-func-name").getValueAsString(); if (TrapFuncName.empty()) { switch (Intrinsic) { case Intrinsic::trap: DAG.setRoot(DAG.getNode(ISD::TRAP, sdl, MVT::Other, getRoot())); break; case Intrinsic::debugtrap: DAG.setRoot(DAG.getNode(ISD::DEBUGTRAP, sdl, MVT::Other, getRoot())); break; case Intrinsic::ubsantrap: DAG.setRoot(DAG.getNode( ISD::UBSANTRAP, sdl, MVT::Other, getRoot(), DAG.getTargetConstant( cast(I.getArgOperand(0))->getZExtValue(), sdl, MVT::i32))); break; default: llvm_unreachable("unknown trap intrinsic"); } return; } TargetLowering::ArgListTy Args; if (Intrinsic == Intrinsic::ubsantrap) { Args.push_back(TargetLoweringBase::ArgListEntry()); Args[0].Val = I.getArgOperand(0); Args[0].Node = getValue(Args[0].Val); Args[0].Ty = Args[0].Val->getType(); } TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee( CallingConv::C, I.getType(), DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy(DAG.getDataLayout())), std::move(Args)); std::pair Result = TLI.LowerCallTo(CLI); DAG.setRoot(Result.second); return; } case Intrinsic::allow_runtime_check: case Intrinsic::allow_ubsan_check: setValue(&I, getValue(ConstantInt::getTrue(I.getType()))); return; case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: case Intrinsic::usub_with_overflow: case Intrinsic::ssub_with_overflow: case Intrinsic::umul_with_overflow: case Intrinsic::smul_with_overflow: { ISD::NodeType Op; switch (Intrinsic) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break; case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break; case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break; case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break; case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break; case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break; } SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); EVT ResultVT = Op1.getValueType(); EVT OverflowVT = MVT::i1; if (ResultVT.isVector()) OverflowVT = EVT::getVectorVT( *Context, OverflowVT, ResultVT.getVectorElementCount()); SDVTList VTs = DAG.getVTList(ResultVT, OverflowVT); setValue(&I, DAG.getNode(Op, sdl, VTs, Op1, Op2)); return; } case Intrinsic::prefetch: { SDValue Ops[5]; unsigned rw = cast(I.getArgOperand(1))->getZExtValue(); auto Flags = rw == 0 ? MachineMemOperand::MOLoad :MachineMemOperand::MOStore; Ops[0] = DAG.getRoot(); Ops[1] = getValue(I.getArgOperand(0)); Ops[2] = DAG.getTargetConstant(*cast(I.getArgOperand(1)), sdl, MVT::i32); Ops[3] = DAG.getTargetConstant(*cast(I.getArgOperand(2)), sdl, MVT::i32); Ops[4] = DAG.getTargetConstant(*cast(I.getArgOperand(3)), sdl, MVT::i32); SDValue Result = DAG.getMemIntrinsicNode( ISD::PREFETCH, sdl, DAG.getVTList(MVT::Other), Ops, EVT::getIntegerVT(*Context, 8), MachinePointerInfo(I.getArgOperand(0)), /* align */ std::nullopt, Flags); // Chain the prefetch in parallel with any pending loads, to stay out of // the way of later optimizations. PendingLoads.push_back(Result); Result = getRoot(); DAG.setRoot(Result); return; } case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: { bool IsStart = (Intrinsic == Intrinsic::lifetime_start); // Stack coloring is not enabled in O0, discard region information. if (TM.getOptLevel() == CodeGenOptLevel::None) return; const int64_t ObjectSize = cast(I.getArgOperand(0))->getSExtValue(); Value *const ObjectPtr = I.getArgOperand(1); SmallVector Allocas; getUnderlyingObjects(ObjectPtr, Allocas); for (const Value *Alloca : Allocas) { const AllocaInst *LifetimeObject = dyn_cast_or_null(Alloca); // Could not find an Alloca. if (!LifetimeObject) continue; // First check that the Alloca is static, otherwise it won't have a // valid frame index. auto SI = FuncInfo.StaticAllocaMap.find(LifetimeObject); if (SI == FuncInfo.StaticAllocaMap.end()) return; const int FrameIndex = SI->second; int64_t Offset; if (GetPointerBaseWithConstantOffset( ObjectPtr, Offset, DAG.getDataLayout()) != LifetimeObject) Offset = -1; // Cannot determine offset from alloca to lifetime object. Res = DAG.getLifetimeNode(IsStart, sdl, getRoot(), FrameIndex, ObjectSize, Offset); DAG.setRoot(Res); } return; } case Intrinsic::pseudoprobe: { auto Guid = cast(I.getArgOperand(0))->getZExtValue(); auto Index = cast(I.getArgOperand(1))->getZExtValue(); auto Attr = cast(I.getArgOperand(2))->getZExtValue(); Res = DAG.getPseudoProbeNode(sdl, getRoot(), Guid, Index, Attr); DAG.setRoot(Res); return; } case Intrinsic::invariant_start: // Discard region information. setValue(&I, DAG.getUNDEF(TLI.getValueType(DAG.getDataLayout(), I.getType()))); return; case Intrinsic::invariant_end: // Discard region information. return; case Intrinsic::clear_cache: { SDValue InputChain = DAG.getRoot(); SDValue StartVal = getValue(I.getArgOperand(0)); SDValue EndVal = getValue(I.getArgOperand(1)); Res = DAG.getNode(ISD::CLEAR_CACHE, sdl, DAG.getVTList(MVT::Other), {InputChain, StartVal, EndVal}); setValue(&I, Res); DAG.setRoot(Res); return; } case Intrinsic::donothing: case Intrinsic::seh_try_begin: case Intrinsic::seh_scope_begin: case Intrinsic::seh_try_end: case Intrinsic::seh_scope_end: // ignore return; case Intrinsic::experimental_stackmap: visitStackmap(I); return; case Intrinsic::experimental_patchpoint_void: case Intrinsic::experimental_patchpoint: visitPatchpoint(I); return; case Intrinsic::experimental_gc_statepoint: LowerStatepoint(cast(I)); return; case Intrinsic::experimental_gc_result: visitGCResult(cast(I)); return; case Intrinsic::experimental_gc_relocate: visitGCRelocate(cast(I)); return; case Intrinsic::instrprof_cover: llvm_unreachable("instrprof failed to lower a cover"); case Intrinsic::instrprof_increment: llvm_unreachable("instrprof failed to lower an increment"); case Intrinsic::instrprof_timestamp: llvm_unreachable("instrprof failed to lower a timestamp"); case Intrinsic::instrprof_value_profile: llvm_unreachable("instrprof failed to lower a value profiling call"); case Intrinsic::instrprof_mcdc_parameters: llvm_unreachable("instrprof failed to lower mcdc parameters"); case Intrinsic::instrprof_mcdc_tvbitmap_update: llvm_unreachable("instrprof failed to lower an mcdc tvbitmap update"); case Intrinsic::localescape: { MachineFunction &MF = DAG.getMachineFunction(); const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission // is the same on all targets. for (unsigned Idx = 0, E = I.arg_size(); Idx < E; ++Idx) { Value *Arg = I.getArgOperand(Idx)->stripPointerCasts(); if (isa(Arg)) continue; // Skip null pointers. They represent a hole in index space. AllocaInst *Slot = cast(Arg); assert(FuncInfo.StaticAllocaMap.count(Slot) && "can only escape static allocas"); int FI = FuncInfo.StaticAllocaMap[Slot]; MCSymbol *FrameAllocSym = MF.getContext().getOrCreateFrameAllocSymbol( GlobalValue::dropLLVMManglingEscape(MF.getName()), Idx); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl, TII->get(TargetOpcode::LOCAL_ESCAPE)) .addSym(FrameAllocSym) .addFrameIndex(FI); } return; } case Intrinsic::localrecover: { // i8* @llvm.localrecover(i8* %fn, i8* %fp, i32 %idx) MachineFunction &MF = DAG.getMachineFunction(); // Get the symbol that defines the frame offset. auto *Fn = cast(I.getArgOperand(0)->stripPointerCasts()); auto *Idx = cast(I.getArgOperand(2)); unsigned IdxVal = unsigned(Idx->getLimitedValue(std::numeric_limits::max())); MCSymbol *FrameAllocSym = MF.getContext().getOrCreateFrameAllocSymbol( GlobalValue::dropLLVMManglingEscape(Fn->getName()), IdxVal); Value *FP = I.getArgOperand(1); SDValue FPVal = getValue(FP); EVT PtrVT = FPVal.getValueType(); // Create a MCSymbol for the label to avoid any target lowering // that would make this PC relative. SDValue OffsetSym = DAG.getMCSymbol(FrameAllocSym, PtrVT); SDValue OffsetVal = DAG.getNode(ISD::LOCAL_RECOVER, sdl, PtrVT, OffsetSym); // Add the offset to the FP. SDValue Add = DAG.getMemBasePlusOffset(FPVal, OffsetVal, sdl); setValue(&I, Add); return; } case Intrinsic::eh_exceptionpointer: case Intrinsic::eh_exceptioncode: { // Get the exception pointer vreg, copy from it, and resize it to fit. const auto *CPI = cast(I.getArgOperand(0)); MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT); unsigned VReg = FuncInfo.getCatchPadExceptionPointerVReg(CPI, PtrRC); SDValue N = DAG.getCopyFromReg(DAG.getEntryNode(), sdl, VReg, PtrVT); if (Intrinsic == Intrinsic::eh_exceptioncode) N = DAG.getZExtOrTrunc(N, sdl, MVT::i32); setValue(&I, N); return; } case Intrinsic::xray_customevent: { // Here we want to make sure that the intrinsic behaves as if it has a // specific calling convention. const auto &Triple = DAG.getTarget().getTargetTriple(); if (!Triple.isAArch64(64) && Triple.getArch() != Triple::x86_64) return; SmallVector Ops; // We want to say that we always want the arguments in registers. SDValue LogEntryVal = getValue(I.getArgOperand(0)); SDValue StrSizeVal = getValue(I.getArgOperand(1)); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Chain = getRoot(); Ops.push_back(LogEntryVal); Ops.push_back(StrSizeVal); Ops.push_back(Chain); // We need to enforce the calling convention for the callsite, so that // argument ordering is enforced correctly, and that register allocation can // see that some registers may be assumed clobbered and have to preserve // them across calls to the intrinsic. MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHABLE_EVENT_CALL, sdl, NodeTys, Ops); SDValue patchableNode = SDValue(MN, 0); DAG.setRoot(patchableNode); setValue(&I, patchableNode); return; } case Intrinsic::xray_typedevent: { // Here we want to make sure that the intrinsic behaves as if it has a // specific calling convention. const auto &Triple = DAG.getTarget().getTargetTriple(); if (!Triple.isAArch64(64) && Triple.getArch() != Triple::x86_64) return; SmallVector Ops; // We want to say that we always want the arguments in registers. // It's unclear to me how manipulating the selection DAG here forces callers // to provide arguments in registers instead of on the stack. SDValue LogTypeId = getValue(I.getArgOperand(0)); SDValue LogEntryVal = getValue(I.getArgOperand(1)); SDValue StrSizeVal = getValue(I.getArgOperand(2)); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Chain = getRoot(); Ops.push_back(LogTypeId); Ops.push_back(LogEntryVal); Ops.push_back(StrSizeVal); Ops.push_back(Chain); // We need to enforce the calling convention for the callsite, so that // argument ordering is enforced correctly, and that register allocation can // see that some registers may be assumed clobbered and have to preserve // them across calls to the intrinsic. MachineSDNode *MN = DAG.getMachineNode( TargetOpcode::PATCHABLE_TYPED_EVENT_CALL, sdl, NodeTys, Ops); SDValue patchableNode = SDValue(MN, 0); DAG.setRoot(patchableNode); setValue(&I, patchableNode); return; } case Intrinsic::experimental_deoptimize: LowerDeoptimizeCall(&I); return; case Intrinsic::experimental_stepvector: visitStepVector(I); return; case Intrinsic::vector_reduce_fadd: case Intrinsic::vector_reduce_fmul: case Intrinsic::vector_reduce_add: case Intrinsic::vector_reduce_mul: case Intrinsic::vector_reduce_and: case Intrinsic::vector_reduce_or: case Intrinsic::vector_reduce_xor: case Intrinsic::vector_reduce_smax: case Intrinsic::vector_reduce_smin: case Intrinsic::vector_reduce_umax: case Intrinsic::vector_reduce_umin: case Intrinsic::vector_reduce_fmax: case Intrinsic::vector_reduce_fmin: case Intrinsic::vector_reduce_fmaximum: case Intrinsic::vector_reduce_fminimum: visitVectorReduce(I, Intrinsic); return; case Intrinsic::icall_branch_funnel: { SmallVector Ops; Ops.push_back(getValue(I.getArgOperand(0))); int64_t Offset; auto *Base = dyn_cast(GetPointerBaseWithConstantOffset( I.getArgOperand(1), Offset, DAG.getDataLayout())); if (!Base) report_fatal_error( "llvm.icall.branch.funnel operand must be a GlobalValue"); Ops.push_back(DAG.getTargetGlobalAddress(Base, sdl, MVT::i64, 0)); struct BranchFunnelTarget { int64_t Offset; SDValue Target; }; SmallVector Targets; for (unsigned Op = 1, N = I.arg_size(); Op != N; Op += 2) { auto *ElemBase = dyn_cast(GetPointerBaseWithConstantOffset( I.getArgOperand(Op), Offset, DAG.getDataLayout())); if (ElemBase != Base) report_fatal_error("all llvm.icall.branch.funnel operands must refer " "to the same GlobalValue"); SDValue Val = getValue(I.getArgOperand(Op + 1)); auto *GA = dyn_cast(Val); if (!GA) report_fatal_error( "llvm.icall.branch.funnel operand must be a GlobalValue"); Targets.push_back({Offset, DAG.getTargetGlobalAddress( GA->getGlobal(), sdl, Val.getValueType(), GA->getOffset())}); } llvm::sort(Targets, [](const BranchFunnelTarget &T1, const BranchFunnelTarget &T2) { return T1.Offset < T2.Offset; }); for (auto &T : Targets) { Ops.push_back(DAG.getTargetConstant(T.Offset, sdl, MVT::i32)); Ops.push_back(T.Target); } Ops.push_back(DAG.getRoot()); // Chain SDValue N(DAG.getMachineNode(TargetOpcode::ICALL_BRANCH_FUNNEL, sdl, MVT::Other, Ops), 0); DAG.setRoot(N); setValue(&I, N); HasTailCall = true; return; } case Intrinsic::wasm_landingpad_index: // Information this intrinsic contained has been transferred to // MachineFunction in SelectionDAGISel::PrepareEHLandingPad. We can safely // delete it now. return; case Intrinsic::aarch64_settag: case Intrinsic::aarch64_settag_zero: { const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); bool ZeroMemory = Intrinsic == Intrinsic::aarch64_settag_zero; SDValue Val = TSI.EmitTargetCodeForSetTag( DAG, sdl, getRoot(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), MachinePointerInfo(I.getArgOperand(0)), ZeroMemory); DAG.setRoot(Val); setValue(&I, Val); return; } case Intrinsic::amdgcn_cs_chain: { assert(I.arg_size() == 5 && "Additional args not supported yet"); assert(cast(I.getOperand(4))->isZero() && "Non-zero flags not supported yet"); // At this point we don't care if it's amdgpu_cs_chain or // amdgpu_cs_chain_preserve. CallingConv::ID CC = CallingConv::AMDGPU_CS_Chain; Type *RetTy = I.getType(); assert(RetTy->isVoidTy() && "Should not return"); SDValue Callee = getValue(I.getOperand(0)); // We only have 2 actual args: one for the SGPRs and one for the VGPRs. // We'll also tack the value of the EXEC mask at the end. TargetLowering::ArgListTy Args; Args.reserve(3); for (unsigned Idx : {2, 3, 1}) { TargetLowering::ArgListEntry Arg; Arg.Node = getValue(I.getOperand(Idx)); Arg.Ty = I.getOperand(Idx)->getType(); Arg.setAttributes(&I, Idx); Args.push_back(Arg); } assert(Args[0].IsInReg && "SGPR args should be marked inreg"); assert(!Args[1].IsInReg && "VGPR args should not be marked inreg"); Args[2].IsInReg = true; // EXEC should be inreg TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(getCurSDLoc()) .setChain(getRoot()) .setCallee(CC, RetTy, Callee, std::move(Args)) .setNoReturn(true) .setTailCall(true) .setConvergent(I.isConvergent()); CLI.CB = &I; std::pair Result = lowerInvokable(CLI, /*EHPadBB*/ nullptr); (void)Result; assert(!Result.first.getNode() && !Result.second.getNode() && "Should've lowered as tail call"); HasTailCall = true; return; } case Intrinsic::ptrmask: { SDValue Ptr = getValue(I.getOperand(0)); SDValue Mask = getValue(I.getOperand(1)); // On arm64_32, pointers are 32 bits when stored in memory, but // zero-extended to 64 bits when in registers. Thus the mask is 32 bits to // match the index type, but the pointer is 64 bits, so the the mask must be // zero-extended up to 64 bits to match the pointer. EVT PtrVT = TLI.getValueType(DAG.getDataLayout(), I.getOperand(0)->getType()); EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getOperand(0)->getType()); assert(PtrVT == Ptr.getValueType()); assert(MemVT == Mask.getValueType()); if (MemVT != PtrVT) Mask = DAG.getPtrExtOrTrunc(Mask, sdl, PtrVT); setValue(&I, DAG.getNode(ISD::AND, sdl, PtrVT, Ptr, Mask)); return; } case Intrinsic::threadlocal_address: { setValue(&I, getValue(I.getOperand(0))); return; } case Intrinsic::get_active_lane_mask: { EVT CCVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); SDValue Index = getValue(I.getOperand(0)); EVT ElementVT = Index.getValueType(); if (!TLI.shouldExpandGetActiveLaneMask(CCVT, ElementVT)) { visitTargetIntrinsic(I, Intrinsic); return; } SDValue TripCount = getValue(I.getOperand(1)); EVT VecTy = EVT::getVectorVT(*DAG.getContext(), ElementVT, CCVT.getVectorElementCount()); SDValue VectorIndex = DAG.getSplat(VecTy, sdl, Index); SDValue VectorTripCount = DAG.getSplat(VecTy, sdl, TripCount); SDValue VectorStep = DAG.getStepVector(sdl, VecTy); SDValue VectorInduction = DAG.getNode( ISD::UADDSAT, sdl, VecTy, VectorIndex, VectorStep); SDValue SetCC = DAG.getSetCC(sdl, CCVT, VectorInduction, VectorTripCount, ISD::CondCode::SETULT); setValue(&I, SetCC); return; } case Intrinsic::experimental_get_vector_length: { assert(cast(I.getOperand(1))->getSExtValue() > 0 && "Expected positive VF"); unsigned VF = cast(I.getOperand(1))->getZExtValue(); bool IsScalable = cast(I.getOperand(2))->isOne(); SDValue Count = getValue(I.getOperand(0)); EVT CountVT = Count.getValueType(); if (!TLI.shouldExpandGetVectorLength(CountVT, VF, IsScalable)) { visitTargetIntrinsic(I, Intrinsic); return; } // Expand to a umin between the trip count and the maximum elements the type // can hold. EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); // Extend the trip count to at least the result VT. if (CountVT.bitsLT(VT)) { Count = DAG.getNode(ISD::ZERO_EXTEND, sdl, VT, Count); CountVT = VT; } SDValue MaxEVL = DAG.getElementCount(sdl, CountVT, ElementCount::get(VF, IsScalable)); SDValue UMin = DAG.getNode(ISD::UMIN, sdl, CountVT, Count, MaxEVL); // Clip to the result type if needed. SDValue Trunc = DAG.getNode(ISD::TRUNCATE, sdl, VT, UMin); setValue(&I, Trunc); return; } case Intrinsic::experimental_vector_partial_reduce_add: { SDValue OpNode = getValue(I.getOperand(1)); EVT ReducedTy = EVT::getEVT(I.getType()); EVT FullTy = OpNode.getValueType(); unsigned Stride = ReducedTy.getVectorMinNumElements(); unsigned ScaleFactor = FullTy.getVectorMinNumElements() / Stride; // Collect all of the subvectors std::deque Subvectors; Subvectors.push_back(getValue(I.getOperand(0))); for (unsigned i = 0; i < ScaleFactor; i++) { auto SourceIndex = DAG.getVectorIdxConstant(i * Stride, sdl); Subvectors.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, ReducedTy, {OpNode, SourceIndex})); } // Flatten the subvector tree while (Subvectors.size() > 1) { Subvectors.push_back(DAG.getNode(ISD::ADD, sdl, ReducedTy, {Subvectors[0], Subvectors[1]})); Subvectors.pop_front(); Subvectors.pop_front(); } assert(Subvectors.size() == 1 && "There should only be one subvector after tree flattening"); setValue(&I, Subvectors[0]); return; } case Intrinsic::experimental_cttz_elts: { auto DL = getCurSDLoc(); SDValue Op = getValue(I.getOperand(0)); EVT OpVT = Op.getValueType(); if (!TLI.shouldExpandCttzElements(OpVT)) { visitTargetIntrinsic(I, Intrinsic); return; } if (OpVT.getScalarType() != MVT::i1) { // Compare the input vector elements to zero & use to count trailing zeros SDValue AllZero = DAG.getConstant(0, DL, OpVT); OpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, OpVT.getVectorElementCount()); Op = DAG.getSetCC(DL, OpVT, Op, AllZero, ISD::SETNE); } // If the zero-is-poison flag is set, we can assume the upper limit // of the result is VF-1. bool ZeroIsPoison = !cast(getValue(I.getOperand(1)))->isZero(); ConstantRange VScaleRange(1, true); // Dummy value. if (isa(I.getOperand(0)->getType())) VScaleRange = getVScaleRange(I.getCaller(), 64); unsigned EltWidth = TLI.getBitWidthForCttzElements( I.getType(), OpVT.getVectorElementCount(), ZeroIsPoison, &VScaleRange); MVT NewEltTy = MVT::getIntegerVT(EltWidth); // Create the new vector type & get the vector length EVT NewVT = EVT::getVectorVT(*DAG.getContext(), NewEltTy, OpVT.getVectorElementCount()); SDValue VL = DAG.getElementCount(DL, NewEltTy, OpVT.getVectorElementCount()); SDValue StepVec = DAG.getStepVector(DL, NewVT); SDValue SplatVL = DAG.getSplat(NewVT, DL, VL); SDValue StepVL = DAG.getNode(ISD::SUB, DL, NewVT, SplatVL, StepVec); SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, Op); SDValue And = DAG.getNode(ISD::AND, DL, NewVT, StepVL, Ext); SDValue Max = DAG.getNode(ISD::VECREDUCE_UMAX, DL, NewEltTy, And); SDValue Sub = DAG.getNode(ISD::SUB, DL, NewEltTy, VL, Max); EVT RetTy = TLI.getValueType(DAG.getDataLayout(), I.getType()); SDValue Ret = DAG.getZExtOrTrunc(Sub, DL, RetTy); setValue(&I, Ret); return; } case Intrinsic::vector_insert: { SDValue Vec = getValue(I.getOperand(0)); SDValue SubVec = getValue(I.getOperand(1)); SDValue Index = getValue(I.getOperand(2)); // The intrinsic's index type is i64, but the SDNode requires an index type // suitable for the target. Convert the index as required. MVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); if (Index.getValueType() != VectorIdxTy) Index = DAG.getVectorIdxConstant(Index->getAsZExtVal(), sdl); EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, ResultVT, Vec, SubVec, Index)); return; } case Intrinsic::vector_extract: { SDValue Vec = getValue(I.getOperand(0)); SDValue Index = getValue(I.getOperand(1)); EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); // The intrinsic's index type is i64, but the SDNode requires an index type // suitable for the target. Convert the index as required. MVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); if (Index.getValueType() != VectorIdxTy) Index = DAG.getVectorIdxConstant(Index->getAsZExtVal(), sdl); setValue(&I, DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, ResultVT, Vec, Index)); return; } case Intrinsic::vector_reverse: visitVectorReverse(I); return; case Intrinsic::vector_splice: visitVectorSplice(I); return; case Intrinsic::callbr_landingpad: visitCallBrLandingPad(I); return; case Intrinsic::vector_interleave2: visitVectorInterleave(I); return; case Intrinsic::vector_deinterleave2: visitVectorDeinterleave(I); return; case Intrinsic::experimental_vector_compress: setValue(&I, DAG.getNode(ISD::VECTOR_COMPRESS, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)), Flags)); return; case Intrinsic::experimental_convergence_anchor: case Intrinsic::experimental_convergence_entry: case Intrinsic::experimental_convergence_loop: visitConvergenceControl(I, Intrinsic); return; case Intrinsic::experimental_vector_histogram_add: { visitVectorHistogram(I, Intrinsic); return; } } } void SelectionDAGBuilder::visitConstrainedFPIntrinsic( const ConstrainedFPIntrinsic &FPI) { SDLoc sdl = getCurSDLoc(); // We do not need to serialize constrained FP intrinsics against // each other or against (nonvolatile) loads, so they can be // chained like loads. SDValue Chain = DAG.getRoot(); SmallVector Opers; Opers.push_back(Chain); for (unsigned I = 0, E = FPI.getNonMetadataArgCount(); I != E; ++I) Opers.push_back(getValue(FPI.getArgOperand(I))); auto pushOutChain = [this](SDValue Result, fp::ExceptionBehavior EB) { assert(Result.getNode()->getNumValues() == 2); // Push node to the appropriate list so that future instructions can be // chained up correctly. SDValue OutChain = Result.getValue(1); switch (EB) { case fp::ExceptionBehavior::ebIgnore: // The only reason why ebIgnore nodes still need to be chained is that // they might depend on the current rounding mode, and therefore must // not be moved across instruction that may change that mode. [[fallthrough]]; case fp::ExceptionBehavior::ebMayTrap: // These must not be moved across calls or instructions that may change // floating-point exception masks. PendingConstrainedFP.push_back(OutChain); break; case fp::ExceptionBehavior::ebStrict: // These must not be moved across calls or instructions that may change // floating-point exception masks or read floating-point exception flags. // In addition, they cannot be optimized out even if unused. PendingConstrainedFPStrict.push_back(OutChain); break; } }; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = TLI.getValueType(DAG.getDataLayout(), FPI.getType()); SDVTList VTs = DAG.getVTList(VT, MVT::Other); fp::ExceptionBehavior EB = *FPI.getExceptionBehavior(); SDNodeFlags Flags; if (EB == fp::ExceptionBehavior::ebIgnore) Flags.setNoFPExcept(true); if (auto *FPOp = dyn_cast(&FPI)) Flags.copyFMF(*FPOp); unsigned Opcode; switch (FPI.getIntrinsicID()) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ case Intrinsic::INTRINSIC: \ Opcode = ISD::STRICT_##DAGN; \ break; #include "llvm/IR/ConstrainedOps.def" case Intrinsic::experimental_constrained_fmuladd: { Opcode = ISD::STRICT_FMA; // Break fmuladd into fmul and fadd. if (TM.Options.AllowFPOpFusion == FPOpFusion::Strict || !TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) { Opers.pop_back(); SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, sdl, VTs, Opers, Flags); pushOutChain(Mul, EB); Opcode = ISD::STRICT_FADD; Opers.clear(); Opers.push_back(Mul.getValue(1)); Opers.push_back(Mul.getValue(0)); Opers.push_back(getValue(FPI.getArgOperand(2))); } break; } } // A few strict DAG nodes carry additional operands that are not // set up by the default code above. switch (Opcode) { default: break; case ISD::STRICT_FP_ROUND: Opers.push_back( DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()))); break; case ISD::STRICT_FSETCC: case ISD::STRICT_FSETCCS: { auto *FPCmp = dyn_cast(&FPI); ISD::CondCode Condition = getFCmpCondCode(FPCmp->getPredicate()); if (TM.Options.NoNaNsFPMath) Condition = getFCmpCodeWithoutNaN(Condition); Opers.push_back(DAG.getCondCode(Condition)); break; } } SDValue Result = DAG.getNode(Opcode, sdl, VTs, Opers, Flags); pushOutChain(Result, EB); SDValue FPResult = Result.getValue(0); setValue(&FPI, FPResult); } static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) { std::optional ResOPC; switch (VPIntrin.getIntrinsicID()) { case Intrinsic::vp_ctlz: { bool IsZeroUndef = cast(VPIntrin.getArgOperand(1))->isOne(); ResOPC = IsZeroUndef ? ISD::VP_CTLZ_ZERO_UNDEF : ISD::VP_CTLZ; break; } case Intrinsic::vp_cttz: { bool IsZeroUndef = cast(VPIntrin.getArgOperand(1))->isOne(); ResOPC = IsZeroUndef ? ISD::VP_CTTZ_ZERO_UNDEF : ISD::VP_CTTZ; break; } case Intrinsic::vp_cttz_elts: { bool IsZeroPoison = cast(VPIntrin.getArgOperand(1))->isOne(); ResOPC = IsZeroPoison ? ISD::VP_CTTZ_ELTS_ZERO_UNDEF : ISD::VP_CTTZ_ELTS; break; } #define HELPER_MAP_VPID_TO_VPSD(VPID, VPSD) \ case Intrinsic::VPID: \ ResOPC = ISD::VPSD; \ break; #include "llvm/IR/VPIntrinsics.def" } if (!ResOPC) llvm_unreachable( "Inconsistency: no SDNode available for this VPIntrinsic!"); if (*ResOPC == ISD::VP_REDUCE_SEQ_FADD || *ResOPC == ISD::VP_REDUCE_SEQ_FMUL) { if (VPIntrin.getFastMathFlags().allowReassoc()) return *ResOPC == ISD::VP_REDUCE_SEQ_FADD ? ISD::VP_REDUCE_FADD : ISD::VP_REDUCE_FMUL; } return *ResOPC; } void SelectionDAGBuilder::visitVPLoad( const VPIntrinsic &VPIntrin, EVT VT, const SmallVectorImpl &OpValues) { SDLoc DL = getCurSDLoc(); Value *PtrOperand = VPIntrin.getArgOperand(0); MaybeAlign Alignment = VPIntrin.getPointerAlignment(); AAMDNodes AAInfo = VPIntrin.getAAMetadata(); const MDNode *Ranges = getRangeMetadata(VPIntrin); SDValue LD; // Do not serialize variable-length loads of constant memory with // anything. if (!Alignment) Alignment = DAG.getEVTAlign(VT); MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo); bool AddToChain = !AA || !AA->pointsToConstantMemory(ML); SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad, LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo, Ranges); LD = DAG.getLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2], MMO, false /*IsExpanding */); if (AddToChain) PendingLoads.push_back(LD.getValue(1)); setValue(&VPIntrin, LD); } void SelectionDAGBuilder::visitVPGather( const VPIntrinsic &VPIntrin, EVT VT, const SmallVectorImpl &OpValues) { SDLoc DL = getCurSDLoc(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); Value *PtrOperand = VPIntrin.getArgOperand(0); MaybeAlign Alignment = VPIntrin.getPointerAlignment(); AAMDNodes AAInfo = VPIntrin.getAAMetadata(); const MDNode *Ranges = getRangeMetadata(VPIntrin); SDValue LD; if (!Alignment) Alignment = DAG.getEVTAlign(VT.getScalarType()); unsigned AS = PtrOperand->getType()->getScalarType()->getPointerAddressSpace(); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(AS), MachineMemOperand::MOLoad, LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo, Ranges); SDValue Base, Index, Scale; ISD::MemIndexType IndexType; bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale, this, VPIntrin.getParent(), VT.getScalarStoreSize()); if (!UniformBase) { Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(PtrOperand); IndexType = ISD::SIGNED_SCALED; Scale = DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())); } EVT IdxVT = Index.getValueType(); EVT EltTy = IdxVT.getVectorElementType(); if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) { EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy); Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index); } LD = DAG.getGatherVP( DAG.getVTList(VT, MVT::Other), VT, DL, {DAG.getRoot(), Base, Index, Scale, OpValues[1], OpValues[2]}, MMO, IndexType); PendingLoads.push_back(LD.getValue(1)); setValue(&VPIntrin, LD); } void SelectionDAGBuilder::visitVPStore( const VPIntrinsic &VPIntrin, const SmallVectorImpl &OpValues) { SDLoc DL = getCurSDLoc(); Value *PtrOperand = VPIntrin.getArgOperand(1); EVT VT = OpValues[0].getValueType(); MaybeAlign Alignment = VPIntrin.getPointerAlignment(); AAMDNodes AAInfo = VPIntrin.getAAMetadata(); SDValue ST; if (!Alignment) Alignment = DAG.getEVTAlign(VT); SDValue Ptr = OpValues[1]; SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore, LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo); ST = DAG.getStoreVP(getMemoryRoot(), DL, OpValues[0], Ptr, Offset, OpValues[2], OpValues[3], VT, MMO, ISD::UNINDEXED, /* IsTruncating */ false, /*IsCompressing*/ false); DAG.setRoot(ST); setValue(&VPIntrin, ST); } void SelectionDAGBuilder::visitVPScatter( const VPIntrinsic &VPIntrin, const SmallVectorImpl &OpValues) { SDLoc DL = getCurSDLoc(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); Value *PtrOperand = VPIntrin.getArgOperand(1); EVT VT = OpValues[0].getValueType(); MaybeAlign Alignment = VPIntrin.getPointerAlignment(); AAMDNodes AAInfo = VPIntrin.getAAMetadata(); SDValue ST; if (!Alignment) Alignment = DAG.getEVTAlign(VT.getScalarType()); unsigned AS = PtrOperand->getType()->getScalarType()->getPointerAddressSpace(); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(AS), MachineMemOperand::MOStore, LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo); SDValue Base, Index, Scale; ISD::MemIndexType IndexType; bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale, this, VPIntrin.getParent(), VT.getScalarStoreSize()); if (!UniformBase) { Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(PtrOperand); IndexType = ISD::SIGNED_SCALED; Scale = DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())); } EVT IdxVT = Index.getValueType(); EVT EltTy = IdxVT.getVectorElementType(); if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) { EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy); Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index); } ST = DAG.getScatterVP(DAG.getVTList(MVT::Other), VT, DL, {getMemoryRoot(), OpValues[0], Base, Index, Scale, OpValues[2], OpValues[3]}, MMO, IndexType); DAG.setRoot(ST); setValue(&VPIntrin, ST); } void SelectionDAGBuilder::visitVPStridedLoad( const VPIntrinsic &VPIntrin, EVT VT, const SmallVectorImpl &OpValues) { SDLoc DL = getCurSDLoc(); Value *PtrOperand = VPIntrin.getArgOperand(0); MaybeAlign Alignment = VPIntrin.getPointerAlignment(); if (!Alignment) Alignment = DAG.getEVTAlign(VT.getScalarType()); AAMDNodes AAInfo = VPIntrin.getAAMetadata(); const MDNode *Ranges = getRangeMetadata(VPIntrin); MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo); bool AddToChain = !AA || !AA->pointsToConstantMemory(ML); SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); unsigned AS = PtrOperand->getType()->getPointerAddressSpace(); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(AS), MachineMemOperand::MOLoad, LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo, Ranges); SDValue LD = DAG.getStridedLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2], OpValues[3], MMO, false /*IsExpanding*/); if (AddToChain) PendingLoads.push_back(LD.getValue(1)); setValue(&VPIntrin, LD); } void SelectionDAGBuilder::visitVPStridedStore( const VPIntrinsic &VPIntrin, const SmallVectorImpl &OpValues) { SDLoc DL = getCurSDLoc(); Value *PtrOperand = VPIntrin.getArgOperand(1); EVT VT = OpValues[0].getValueType(); MaybeAlign Alignment = VPIntrin.getPointerAlignment(); if (!Alignment) Alignment = DAG.getEVTAlign(VT.getScalarType()); AAMDNodes AAInfo = VPIntrin.getAAMetadata(); unsigned AS = PtrOperand->getType()->getPointerAddressSpace(); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(AS), MachineMemOperand::MOStore, LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo); SDValue ST = DAG.getStridedStoreVP( getMemoryRoot(), DL, OpValues[0], OpValues[1], DAG.getUNDEF(OpValues[1].getValueType()), OpValues[2], OpValues[3], OpValues[4], VT, MMO, ISD::UNINDEXED, /*IsTruncating*/ false, /*IsCompressing*/ false); DAG.setRoot(ST); setValue(&VPIntrin, ST); } void SelectionDAGBuilder::visitVPCmp(const VPCmpIntrinsic &VPIntrin) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDLoc DL = getCurSDLoc(); ISD::CondCode Condition; CmpInst::Predicate CondCode = VPIntrin.getPredicate(); bool IsFP = VPIntrin.getOperand(0)->getType()->isFPOrFPVectorTy(); if (IsFP) { // FIXME: Regular fcmps are FPMathOperators which may have fast-math (nnan) // flags, but calls that don't return floating-point types can't be // FPMathOperators, like vp.fcmp. This affects constrained fcmp too. Condition = getFCmpCondCode(CondCode); if (TM.Options.NoNaNsFPMath) Condition = getFCmpCodeWithoutNaN(Condition); } else { Condition = getICmpCondCode(CondCode); } SDValue Op1 = getValue(VPIntrin.getOperand(0)); SDValue Op2 = getValue(VPIntrin.getOperand(1)); // #2 is the condition code SDValue MaskOp = getValue(VPIntrin.getOperand(3)); SDValue EVL = getValue(VPIntrin.getOperand(4)); MVT EVLParamVT = TLI.getVPExplicitVectorLengthTy(); assert(EVLParamVT.isScalarInteger() && EVLParamVT.bitsGE(MVT::i32) && "Unexpected target EVL type"); EVL = DAG.getNode(ISD::ZERO_EXTEND, DL, EVLParamVT, EVL); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), VPIntrin.getType()); setValue(&VPIntrin, DAG.getSetCCVP(DL, DestVT, Op1, Op2, Condition, MaskOp, EVL)); } void SelectionDAGBuilder::visitVectorPredicationIntrinsic( const VPIntrinsic &VPIntrin) { SDLoc DL = getCurSDLoc(); unsigned Opcode = getISDForVPIntrinsic(VPIntrin); auto IID = VPIntrin.getIntrinsicID(); if (const auto *CmpI = dyn_cast(&VPIntrin)) return visitVPCmp(*CmpI); SmallVector ValueVTs; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); ComputeValueVTs(TLI, DAG.getDataLayout(), VPIntrin.getType(), ValueVTs); SDVTList VTs = DAG.getVTList(ValueVTs); auto EVLParamPos = VPIntrinsic::getVectorLengthParamPos(IID); MVT EVLParamVT = TLI.getVPExplicitVectorLengthTy(); assert(EVLParamVT.isScalarInteger() && EVLParamVT.bitsGE(MVT::i32) && "Unexpected target EVL type"); // Request operands. SmallVector OpValues; for (unsigned I = 0; I < VPIntrin.arg_size(); ++I) { auto Op = getValue(VPIntrin.getArgOperand(I)); if (I == EVLParamPos) Op = DAG.getNode(ISD::ZERO_EXTEND, DL, EVLParamVT, Op); OpValues.push_back(Op); } switch (Opcode) { default: { SDNodeFlags SDFlags; if (auto *FPMO = dyn_cast(&VPIntrin)) SDFlags.copyFMF(*FPMO); SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues, SDFlags); setValue(&VPIntrin, Result); break; } case ISD::VP_LOAD: visitVPLoad(VPIntrin, ValueVTs[0], OpValues); break; case ISD::VP_GATHER: visitVPGather(VPIntrin, ValueVTs[0], OpValues); break; case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: visitVPStridedLoad(VPIntrin, ValueVTs[0], OpValues); break; case ISD::VP_STORE: visitVPStore(VPIntrin, OpValues); break; case ISD::VP_SCATTER: visitVPScatter(VPIntrin, OpValues); break; case ISD::EXPERIMENTAL_VP_STRIDED_STORE: visitVPStridedStore(VPIntrin, OpValues); break; case ISD::VP_FMULADD: { assert(OpValues.size() == 5 && "Unexpected number of operands"); SDNodeFlags SDFlags; if (auto *FPMO = dyn_cast(&VPIntrin)) SDFlags.copyFMF(*FPMO); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), ValueVTs[0])) { setValue(&VPIntrin, DAG.getNode(ISD::VP_FMA, DL, VTs, OpValues, SDFlags)); } else { SDValue Mul = DAG.getNode( ISD::VP_FMUL, DL, VTs, {OpValues[0], OpValues[1], OpValues[3], OpValues[4]}, SDFlags); SDValue Add = DAG.getNode(ISD::VP_FADD, DL, VTs, {Mul, OpValues[2], OpValues[3], OpValues[4]}, SDFlags); setValue(&VPIntrin, Add); } break; } case ISD::VP_IS_FPCLASS: { const DataLayout DLayout = DAG.getDataLayout(); EVT DestVT = TLI.getValueType(DLayout, VPIntrin.getType()); auto Constant = OpValues[1]->getAsZExtVal(); SDValue Check = DAG.getTargetConstant(Constant, DL, MVT::i32); SDValue V = DAG.getNode(ISD::VP_IS_FPCLASS, DL, DestVT, {OpValues[0], Check, OpValues[2], OpValues[3]}); setValue(&VPIntrin, V); return; } case ISD::VP_INTTOPTR: { SDValue N = OpValues[0]; EVT DestVT = TLI.getValueType(DAG.getDataLayout(), VPIntrin.getType()); EVT PtrMemVT = TLI.getMemValueType(DAG.getDataLayout(), VPIntrin.getType()); N = DAG.getVPPtrExtOrTrunc(getCurSDLoc(), DestVT, N, OpValues[1], OpValues[2]); N = DAG.getVPZExtOrTrunc(getCurSDLoc(), PtrMemVT, N, OpValues[1], OpValues[2]); setValue(&VPIntrin, N); break; } case ISD::VP_PTRTOINT: { SDValue N = OpValues[0]; EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), VPIntrin.getType()); EVT PtrMemVT = TLI.getMemValueType(DAG.getDataLayout(), VPIntrin.getOperand(0)->getType()); N = DAG.getVPPtrExtOrTrunc(getCurSDLoc(), PtrMemVT, N, OpValues[1], OpValues[2]); N = DAG.getVPZExtOrTrunc(getCurSDLoc(), DestVT, N, OpValues[1], OpValues[2]); setValue(&VPIntrin, N); break; } case ISD::VP_ABS: case ISD::VP_CTLZ: case ISD::VP_CTLZ_ZERO_UNDEF: case ISD::VP_CTTZ: case ISD::VP_CTTZ_ZERO_UNDEF: case ISD::VP_CTTZ_ELTS_ZERO_UNDEF: case ISD::VP_CTTZ_ELTS: { SDValue Result = DAG.getNode(Opcode, DL, VTs, {OpValues[0], OpValues[2], OpValues[3]}); setValue(&VPIntrin, Result); break; } } } SDValue SelectionDAGBuilder::lowerStartEH(SDValue Chain, const BasicBlock *EHPadBB, MCSymbol *&BeginLabel) { MachineFunction &MF = DAG.getMachineFunction(); MachineModuleInfo &MMI = MF.getMMI(); // Insert a label before the invoke call to mark the try range. This can be // used to detect deletion of the invoke via the MachineModuleInfo. BeginLabel = MF.getContext().createTempSymbol(); // For SjLj, keep track of which landing pads go with which invokes // so as to maintain the ordering of pads in the LSDA. unsigned CallSiteIndex = MMI.getCurrentCallSite(); if (CallSiteIndex) { MF.setCallSiteBeginLabel(BeginLabel, CallSiteIndex); LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(CallSiteIndex); // Now that the call site is handled, stop tracking it. MMI.setCurrentCallSite(0); } return DAG.getEHLabel(getCurSDLoc(), Chain, BeginLabel); } SDValue SelectionDAGBuilder::lowerEndEH(SDValue Chain, const InvokeInst *II, const BasicBlock *EHPadBB, MCSymbol *BeginLabel) { assert(BeginLabel && "BeginLabel should've been set"); MachineFunction &MF = DAG.getMachineFunction(); // Insert a label at the end of the invoke call to mark the try range. This // can be used to detect deletion of the invoke via the MachineModuleInfo. MCSymbol *EndLabel = MF.getContext().createTempSymbol(); Chain = DAG.getEHLabel(getCurSDLoc(), Chain, EndLabel); // Inform MachineModuleInfo of range. auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); // There is a platform (e.g. wasm) that uses funclet style IR but does not // actually use outlined funclets and their LSDA info style. if (MF.hasEHFunclets() && isFuncletEHPersonality(Pers)) { assert(II && "II should've been set"); WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo(); EHInfo->addIPToStateRange(II, BeginLabel, EndLabel); } else if (!isScopedEHPersonality(Pers)) { assert(EHPadBB); MF.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel); } return Chain; } std::pair SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, const BasicBlock *EHPadBB) { MCSymbol *BeginLabel = nullptr; if (EHPadBB) { // Both PendingLoads and PendingExports must be flushed here; // this call might not return. (void)getRoot(); DAG.setRoot(lowerStartEH(getControlRoot(), EHPadBB, BeginLabel)); CLI.setChain(getRoot()); } const TargetLowering &TLI = DAG.getTargetLoweringInfo(); std::pair Result = TLI.LowerCallTo(CLI); assert((CLI.IsTailCall || Result.second.getNode()) && "Non-null chain expected with non-tail call!"); assert((Result.second.getNode() || !Result.first.getNode()) && "Null value expected with tail call!"); if (!Result.second.getNode()) { // As a special case, a null chain means that a tail call has been emitted // and the DAG root is already updated. HasTailCall = true; // Since there's no actual continuation from this block, nothing can be // relying on us setting vregs for them. PendingExports.clear(); } else { DAG.setRoot(Result.second); } if (EHPadBB) { DAG.setRoot(lowerEndEH(getRoot(), cast_or_null(CLI.CB), EHPadBB, BeginLabel)); Result.second = getRoot(); } return Result; } void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee, bool isTailCall, bool isMustTailCall, const BasicBlock *EHPadBB, const TargetLowering::PtrAuthInfo *PAI) { auto &DL = DAG.getDataLayout(); FunctionType *FTy = CB.getFunctionType(); Type *RetTy = CB.getType(); TargetLowering::ArgListTy Args; Args.reserve(CB.arg_size()); const Value *SwiftErrorVal = nullptr; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (isTailCall) { // Avoid emitting tail calls in functions with the disable-tail-calls // attribute. auto *Caller = CB.getParent()->getParent(); if (Caller->getFnAttribute("disable-tail-calls").getValueAsString() == "true" && !isMustTailCall) isTailCall = false; // We can't tail call inside a function with a swifterror argument. Lowering // does not support this yet. It would have to move into the swifterror // register before the call. if (TLI.supportSwiftError() && Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) isTailCall = false; } for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) { TargetLowering::ArgListEntry Entry; const Value *V = *I; // Skip empty types if (V->getType()->isEmptyTy()) continue; SDValue ArgNode = getValue(V); Entry.Node = ArgNode; Entry.Ty = V->getType(); Entry.setAttributes(&CB, I - CB.arg_begin()); // Use swifterror virtual register as input to the call. if (Entry.IsSwiftError && TLI.supportSwiftError()) { SwiftErrorVal = V; // We find the virtual register for the actual swifterror argument. // Instead of using the Value, we use the virtual register instead. Entry.Node = DAG.getRegister(SwiftError.getOrCreateVRegUseAt(&CB, FuncInfo.MBB, V), EVT(TLI.getPointerTy(DL))); } Args.push_back(Entry); // If we have an explicit sret argument that is an Instruction, (i.e., it // might point to function-local memory), we can't meaningfully tail-call. if (Entry.IsSRet && isa(V)) isTailCall = false; } // If call site has a cfguardtarget operand bundle, create and add an // additional ArgListEntry. if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_cfguardtarget)) { TargetLowering::ArgListEntry Entry; Value *V = Bundle->Inputs[0]; SDValue ArgNode = getValue(V); Entry.Node = ArgNode; Entry.Ty = V->getType(); Entry.IsCFGuardTarget = true; Args.push_back(Entry); } // Check if target-independent constraints permit a tail call here. // Target-dependent constraints are checked within TLI->LowerCallTo. if (isTailCall && !isInTailCallPosition(CB, DAG.getTarget())) isTailCall = false; // Disable tail calls if there is an swifterror argument. Targets have not // been updated to support tail calls. if (TLI.supportSwiftError() && SwiftErrorVal) isTailCall = false; ConstantInt *CFIType = nullptr; if (CB.isIndirectCall()) { if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_kcfi)) { if (!TLI.supportKCFIBundles()) report_fatal_error( "Target doesn't support calls with kcfi operand bundles."); CFIType = cast(Bundle->Inputs[0]); assert(CFIType->getType()->isIntegerTy(32) && "Invalid CFI type"); } } SDValue ConvControlToken; if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_convergencectrl)) { auto *Token = Bundle->Inputs[0].get(); ConvControlToken = getValue(Token); } TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(getCurSDLoc()) .setChain(getRoot()) .setCallee(RetTy, FTy, Callee, std::move(Args), CB) .setTailCall(isTailCall) .setConvergent(CB.isConvergent()) .setIsPreallocated( CB.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0) .setCFIType(CFIType) .setConvergenceControlToken(ConvControlToken); // Set the pointer authentication info if we have it. if (PAI) { if (!TLI.supportPtrAuthBundles()) report_fatal_error( "This target doesn't support calls with ptrauth operand bundles."); CLI.setPtrAuth(*PAI); } std::pair Result = lowerInvokable(CLI, EHPadBB); if (Result.first.getNode()) { Result.first = lowerRangeToAssertZExt(DAG, CB, Result.first); setValue(&CB, Result.first); } // The last element of CLI.InVals has the SDValue for swifterror return. // Here we copy it to a virtual register and update SwiftErrorMap for // book-keeping. if (SwiftErrorVal && TLI.supportSwiftError()) { // Get the last element of InVals. SDValue Src = CLI.InVals.back(); Register VReg = SwiftError.getOrCreateVRegDefAt(&CB, FuncInfo.MBB, SwiftErrorVal); SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src); DAG.setRoot(CopyNode); } } static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, SelectionDAGBuilder &Builder) { // Check to see if this load can be trivially constant folded, e.g. if the // input is from a string literal. if (const Constant *LoadInput = dyn_cast(PtrVal)) { // Cast pointer to the type we really want to load. Type *LoadTy = Type::getIntNTy(PtrVal->getContext(), LoadVT.getScalarSizeInBits()); if (LoadVT.isVector()) LoadTy = FixedVectorType::get(LoadTy, LoadVT.getVectorNumElements()); LoadInput = ConstantExpr::getBitCast(const_cast(LoadInput), PointerType::getUnqual(LoadTy)); if (const Constant *LoadCst = ConstantFoldLoadFromConstPtr(const_cast(LoadInput), LoadTy, Builder.DAG.getDataLayout())) return Builder.getValue(LoadCst); } // Otherwise, we have to emit the load. If the pointer is to unfoldable but // still constant memory, the input chain can be the entry node. SDValue Root; bool ConstantMemory = false; // Do not serialize (non-volatile) loads of constant memory with anything. if (Builder.AA && Builder.AA->pointsToConstantMemory(PtrVal)) { Root = Builder.DAG.getEntryNode(); ConstantMemory = true; } else { // Do not serialize non-volatile loads against each other. Root = Builder.DAG.getRoot(); } SDValue Ptr = Builder.getValue(PtrVal); SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurSDLoc(), Root, Ptr, MachinePointerInfo(PtrVal), Align(1)); if (!ConstantMemory) Builder.PendingLoads.push_back(LoadVal.getValue(1)); return LoadVal; } /// Record the value for an instruction that produces an integer result, /// converting the type where necessary. void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I, SDValue Value, bool IsSigned) { EVT VT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType(), true); Value = DAG.getExtOrTrunc(IsSigned, Value, getCurSDLoc(), VT); setValue(&I, Value); } /// See if we can lower a memcmp/bcmp call into an optimized form. If so, return /// true and lower it. Otherwise return false, and it will be lowered like a /// normal call. /// The caller already checked that \p I calls the appropriate LibFunc with a /// correct prototype. bool SelectionDAGBuilder::visitMemCmpBCmpCall(const CallInst &I) { const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1); const Value *Size = I.getArgOperand(2); const ConstantSDNode *CSize = dyn_cast(getValue(Size)); if (CSize && CSize->getZExtValue() == 0) { EVT CallVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType(), true); setValue(&I, DAG.getConstant(0, getCurSDLoc(), CallVT)); return true; } const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair Res = TSI.EmitTargetCodeForMemcmp( DAG, getCurSDLoc(), DAG.getRoot(), getValue(LHS), getValue(RHS), getValue(Size), MachinePointerInfo(LHS), MachinePointerInfo(RHS)); if (Res.first.getNode()) { processIntegerCallValue(I, Res.first, true); PendingLoads.push_back(Res.second); return true; } // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0 // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0 if (!CSize || !isOnlyUsedInZeroEqualityComparison(&I)) return false; // If the target has a fast compare for the given size, it will return a // preferred load type for that size. Require that the load VT is legal and // that the target supports unaligned loads of that type. Otherwise, return // INVALID. auto hasFastLoadsAndCompare = [&](unsigned NumBits) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); MVT LVT = TLI.hasFastEqualityCompare(NumBits); if (LVT != MVT::INVALID_SIMPLE_VALUE_TYPE) { // TODO: Handle 5 byte compare as 4-byte + 1 byte. // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads. // TODO: Check alignment of src and dest ptrs. unsigned DstAS = LHS->getType()->getPointerAddressSpace(); unsigned SrcAS = RHS->getType()->getPointerAddressSpace(); if (!TLI.isTypeLegal(LVT) || !TLI.allowsMisalignedMemoryAccesses(LVT, SrcAS) || !TLI.allowsMisalignedMemoryAccesses(LVT, DstAS)) LVT = MVT::INVALID_SIMPLE_VALUE_TYPE; } return LVT; }; // This turns into unaligned loads. We only do this if the target natively // supports the MVT we'll be loading or if it is small enough (<= 4) that // we'll only produce a small number of byte loads. MVT LoadVT; unsigned NumBitsToCompare = CSize->getZExtValue() * 8; switch (NumBitsToCompare) { default: return false; case 16: LoadVT = MVT::i16; break; case 32: LoadVT = MVT::i32; break; case 64: case 128: case 256: LoadVT = hasFastLoadsAndCompare(NumBitsToCompare); break; } if (LoadVT == MVT::INVALID_SIMPLE_VALUE_TYPE) return false; SDValue LoadL = getMemCmpLoad(LHS, LoadVT, *this); SDValue LoadR = getMemCmpLoad(RHS, LoadVT, *this); // Bitcast to a wide integer type if the loads are vectors. if (LoadVT.isVector()) { EVT CmpVT = EVT::getIntegerVT(LHS->getContext(), LoadVT.getSizeInBits()); LoadL = DAG.getBitcast(CmpVT, LoadL); LoadR = DAG.getBitcast(CmpVT, LoadR); } SDValue Cmp = DAG.getSetCC(getCurSDLoc(), MVT::i1, LoadL, LoadR, ISD::SETNE); processIntegerCallValue(I, Cmp, false); return true; } /// See if we can lower a memchr call into an optimized form. If so, return /// true and lower it. Otherwise return false, and it will be lowered like a /// normal call. /// The caller already checked that \p I calls the appropriate LibFunc with a /// correct prototype. bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) { const Value *Src = I.getArgOperand(0); const Value *Char = I.getArgOperand(1); const Value *Length = I.getArgOperand(2); const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair Res = TSI.EmitTargetCodeForMemchr(DAG, getCurSDLoc(), DAG.getRoot(), getValue(Src), getValue(Char), getValue(Length), MachinePointerInfo(Src)); if (Res.first.getNode()) { setValue(&I, Res.first); PendingLoads.push_back(Res.second); return true; } return false; } /// See if we can lower a mempcpy call into an optimized form. If so, return /// true and lower it. Otherwise return false, and it will be lowered like a /// normal call. /// The caller already checked that \p I calls the appropriate LibFunc with a /// correct prototype. bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) { SDValue Dst = getValue(I.getArgOperand(0)); SDValue Src = getValue(I.getArgOperand(1)); SDValue Size = getValue(I.getArgOperand(2)); Align DstAlign = DAG.InferPtrAlign(Dst).valueOrOne(); Align SrcAlign = DAG.InferPtrAlign(Src).valueOrOne(); // DAG::getMemcpy needs Alignment to be defined. Align Alignment = std::min(DstAlign, SrcAlign); SDLoc sdl = getCurSDLoc(); // In the mempcpy context we need to pass in a false value for isTailCall // because the return pointer needs to be adjusted by the size of // the copied memory. SDValue Root = getMemoryRoot(); SDValue MC = DAG.getMemcpy( Root, sdl, Dst, Src, Size, Alignment, false, false, /*CI=*/nullptr, std::nullopt, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1)), I.getAAMetadata()); assert(MC.getNode() != nullptr && "** memcpy should not be lowered as TailCall in mempcpy context **"); DAG.setRoot(MC); // Check if Size needs to be truncated or extended. Size = DAG.getSExtOrTrunc(Size, sdl, Dst.getValueType()); // Adjust return pointer to point just past the last dst byte. SDValue DstPlusSize = DAG.getNode(ISD::ADD, sdl, Dst.getValueType(), Dst, Size); setValue(&I, DstPlusSize); return true; } /// See if we can lower a strcpy call into an optimized form. If so, return /// true and lower it, otherwise return false and it will be lowered like a /// normal call. /// The caller already checked that \p I calls the appropriate LibFunc with a /// correct prototype. bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) { const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair Res = TSI.EmitTargetCodeForStrcpy(DAG, getCurSDLoc(), getRoot(), getValue(Arg0), getValue(Arg1), MachinePointerInfo(Arg0), MachinePointerInfo(Arg1), isStpcpy); if (Res.first.getNode()) { setValue(&I, Res.first); DAG.setRoot(Res.second); return true; } return false; } /// See if we can lower a strcmp call into an optimized form. If so, return /// true and lower it, otherwise return false and it will be lowered like a /// normal call. /// The caller already checked that \p I calls the appropriate LibFunc with a /// correct prototype. bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) { const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair Res = TSI.EmitTargetCodeForStrcmp(DAG, getCurSDLoc(), DAG.getRoot(), getValue(Arg0), getValue(Arg1), MachinePointerInfo(Arg0), MachinePointerInfo(Arg1)); if (Res.first.getNode()) { processIntegerCallValue(I, Res.first, true); PendingLoads.push_back(Res.second); return true; } return false; } /// See if we can lower a strlen call into an optimized form. If so, return /// true and lower it, otherwise return false and it will be lowered like a /// normal call. /// The caller already checked that \p I calls the appropriate LibFunc with a /// correct prototype. bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) { const Value *Arg0 = I.getArgOperand(0); const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair Res = TSI.EmitTargetCodeForStrlen(DAG, getCurSDLoc(), DAG.getRoot(), getValue(Arg0), MachinePointerInfo(Arg0)); if (Res.first.getNode()) { processIntegerCallValue(I, Res.first, false); PendingLoads.push_back(Res.second); return true; } return false; } /// See if we can lower a strnlen call into an optimized form. If so, return /// true and lower it, otherwise return false and it will be lowered like a /// normal call. /// The caller already checked that \p I calls the appropriate LibFunc with a /// correct prototype. bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) { const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair Res = TSI.EmitTargetCodeForStrnlen(DAG, getCurSDLoc(), DAG.getRoot(), getValue(Arg0), getValue(Arg1), MachinePointerInfo(Arg0)); if (Res.first.getNode()) { processIntegerCallValue(I, Res.first, false); PendingLoads.push_back(Res.second); return true; } return false; } /// See if we can lower a unary floating-point operation into an SDNode with /// the specified Opcode. If so, return true and lower it, otherwise return /// false and it will be lowered like a normal call. /// The caller already checked that \p I calls the appropriate LibFunc with a /// correct prototype. bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I, unsigned Opcode) { // We already checked this call's prototype; verify it doesn't modify errno. if (!I.onlyReadsMemory()) return false; SDNodeFlags Flags; Flags.copyFMF(cast(I)); SDValue Tmp = getValue(I.getArgOperand(0)); setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), Tmp.getValueType(), Tmp, Flags)); return true; } /// See if we can lower a binary floating-point operation into an SDNode with /// the specified Opcode. If so, return true and lower it. Otherwise return /// false, and it will be lowered like a normal call. /// The caller already checked that \p I calls the appropriate LibFunc with a /// correct prototype. bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I, unsigned Opcode) { // We already checked this call's prototype; verify it doesn't modify errno. if (!I.onlyReadsMemory()) return false; SDNodeFlags Flags; Flags.copyFMF(cast(I)); SDValue Tmp0 = getValue(I.getArgOperand(0)); SDValue Tmp1 = getValue(I.getArgOperand(1)); EVT VT = Tmp0.getValueType(); setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), VT, Tmp0, Tmp1, Flags)); return true; } void SelectionDAGBuilder::visitCall(const CallInst &I) { // Handle inline assembly differently. if (I.isInlineAsm()) { visitInlineAsm(I); return; } diagnoseDontCall(I); if (Function *F = I.getCalledFunction()) { if (F->isDeclaration()) { // Is this an LLVM intrinsic or a target-specific intrinsic? unsigned IID = F->getIntrinsicID(); if (!IID) if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) IID = II->getIntrinsicID(F); if (IID) { visitIntrinsicCall(I, IID); return; } } // Check for well-known libc/libm calls. If the function is internal, it // can't be a library call. Don't do the check if marked as nobuiltin for // some reason or the call site requires strict floating point semantics. LibFunc Func; if (!I.isNoBuiltin() && !I.isStrictFP() && !F->hasLocalLinkage() && F->hasName() && LibInfo->getLibFunc(*F, Func) && LibInfo->hasOptimizedCodeGen(Func)) { switch (Func) { default: break; case LibFunc_bcmp: if (visitMemCmpBCmpCall(I)) return; break; case LibFunc_copysign: case LibFunc_copysignf: case LibFunc_copysignl: // We already checked this call's prototype; verify it doesn't modify // errno. if (I.onlyReadsMemory()) { SDValue LHS = getValue(I.getArgOperand(0)); SDValue RHS = getValue(I.getArgOperand(1)); setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurSDLoc(), LHS.getValueType(), LHS, RHS)); return; } break; case LibFunc_fabs: case LibFunc_fabsf: case LibFunc_fabsl: if (visitUnaryFloatCall(I, ISD::FABS)) return; break; case LibFunc_fmin: case LibFunc_fminf: case LibFunc_fminl: if (visitBinaryFloatCall(I, ISD::FMINNUM)) return; break; case LibFunc_fmax: case LibFunc_fmaxf: case LibFunc_fmaxl: if (visitBinaryFloatCall(I, ISD::FMAXNUM)) return; break; case LibFunc_sin: case LibFunc_sinf: case LibFunc_sinl: if (visitUnaryFloatCall(I, ISD::FSIN)) return; break; case LibFunc_cos: case LibFunc_cosf: case LibFunc_cosl: if (visitUnaryFloatCall(I, ISD::FCOS)) return; break; case LibFunc_tan: case LibFunc_tanf: case LibFunc_tanl: if (visitUnaryFloatCall(I, ISD::FTAN)) return; break; case LibFunc_asin: case LibFunc_asinf: case LibFunc_asinl: if (visitUnaryFloatCall(I, ISD::FASIN)) return; break; case LibFunc_acos: case LibFunc_acosf: case LibFunc_acosl: if (visitUnaryFloatCall(I, ISD::FACOS)) return; break; case LibFunc_atan: case LibFunc_atanf: case LibFunc_atanl: if (visitUnaryFloatCall(I, ISD::FATAN)) return; break; case LibFunc_sinh: case LibFunc_sinhf: case LibFunc_sinhl: if (visitUnaryFloatCall(I, ISD::FSINH)) return; break; case LibFunc_cosh: case LibFunc_coshf: case LibFunc_coshl: if (visitUnaryFloatCall(I, ISD::FCOSH)) return; break; case LibFunc_tanh: case LibFunc_tanhf: case LibFunc_tanhl: if (visitUnaryFloatCall(I, ISD::FTANH)) return; break; case LibFunc_sqrt: case LibFunc_sqrtf: case LibFunc_sqrtl: case LibFunc_sqrt_finite: case LibFunc_sqrtf_finite: case LibFunc_sqrtl_finite: if (visitUnaryFloatCall(I, ISD::FSQRT)) return; break; case LibFunc_floor: case LibFunc_floorf: case LibFunc_floorl: if (visitUnaryFloatCall(I, ISD::FFLOOR)) return; break; case LibFunc_nearbyint: case LibFunc_nearbyintf: case LibFunc_nearbyintl: if (visitUnaryFloatCall(I, ISD::FNEARBYINT)) return; break; case LibFunc_ceil: case LibFunc_ceilf: case LibFunc_ceill: if (visitUnaryFloatCall(I, ISD::FCEIL)) return; break; case LibFunc_rint: case LibFunc_rintf: case LibFunc_rintl: if (visitUnaryFloatCall(I, ISD::FRINT)) return; break; case LibFunc_round: case LibFunc_roundf: case LibFunc_roundl: if (visitUnaryFloatCall(I, ISD::FROUND)) return; break; case LibFunc_trunc: case LibFunc_truncf: case LibFunc_truncl: if (visitUnaryFloatCall(I, ISD::FTRUNC)) return; break; case LibFunc_log2: case LibFunc_log2f: case LibFunc_log2l: if (visitUnaryFloatCall(I, ISD::FLOG2)) return; break; case LibFunc_exp2: case LibFunc_exp2f: case LibFunc_exp2l: if (visitUnaryFloatCall(I, ISD::FEXP2)) return; break; case LibFunc_exp10: case LibFunc_exp10f: case LibFunc_exp10l: if (visitUnaryFloatCall(I, ISD::FEXP10)) return; break; case LibFunc_ldexp: case LibFunc_ldexpf: case LibFunc_ldexpl: if (visitBinaryFloatCall(I, ISD::FLDEXP)) return; break; case LibFunc_memcmp: if (visitMemCmpBCmpCall(I)) return; break; case LibFunc_mempcpy: if (visitMemPCpyCall(I)) return; break; case LibFunc_memchr: if (visitMemChrCall(I)) return; break; case LibFunc_strcpy: if (visitStrCpyCall(I, false)) return; break; case LibFunc_stpcpy: if (visitStrCpyCall(I, true)) return; break; case LibFunc_strcmp: if (visitStrCmpCall(I)) return; break; case LibFunc_strlen: if (visitStrLenCall(I)) return; break; case LibFunc_strnlen: if (visitStrNLenCall(I)) return; break; } } } if (I.countOperandBundlesOfType(LLVMContext::OB_ptrauth)) { LowerCallSiteWithPtrAuthBundle(cast(I), /*EHPadBB=*/nullptr); return; } // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't // have to do anything here to lower funclet bundles. // CFGuardTarget bundles are lowered in LowerCallTo. assert(!I.hasOperandBundlesOtherThan( {LLVMContext::OB_deopt, LLVMContext::OB_funclet, LLVMContext::OB_cfguardtarget, LLVMContext::OB_preallocated, LLVMContext::OB_clang_arc_attachedcall, LLVMContext::OB_kcfi, LLVMContext::OB_convergencectrl}) && "Cannot lower calls with arbitrary operand bundles!"); SDValue Callee = getValue(I.getCalledOperand()); if (I.hasDeoptState()) LowerCallSiteWithDeoptBundle(&I, Callee, nullptr); else // Check if we can potentially perform a tail call. More detailed checking // is be done within LowerCallTo, after more information about the call is // known. LowerCallTo(I, Callee, I.isTailCall(), I.isMustTailCall()); } void SelectionDAGBuilder::LowerCallSiteWithPtrAuthBundle( const CallBase &CB, const BasicBlock *EHPadBB) { auto PAB = CB.getOperandBundle("ptrauth"); const Value *CalleeV = CB.getCalledOperand(); // Gather the call ptrauth data from the operand bundle: // [ i32 , i64 ] const auto *Key = cast(PAB->Inputs[0]); const Value *Discriminator = PAB->Inputs[1]; assert(Key->getType()->isIntegerTy(32) && "Invalid ptrauth key"); assert(Discriminator->getType()->isIntegerTy(64) && "Invalid ptrauth discriminator"); // Look through ptrauth constants to find the raw callee. // Do a direct unauthenticated call if we found it and everything matches. if (const auto *CalleeCPA = dyn_cast(CalleeV)) if (CalleeCPA->isKnownCompatibleWith(Key, Discriminator, DAG.getDataLayout())) return LowerCallTo(CB, getValue(CalleeCPA->getPointer()), CB.isTailCall(), CB.isMustTailCall(), EHPadBB); // Functions should never be ptrauth-called directly. assert(!isa(CalleeV) && "invalid direct ptrauth call"); // Otherwise, do an authenticated indirect call. TargetLowering::PtrAuthInfo PAI = {Key->getZExtValue(), getValue(Discriminator)}; LowerCallTo(CB, getValue(CalleeV), CB.isTailCall(), CB.isMustTailCall(), EHPadBB, &PAI); } namespace { /// AsmOperandInfo - This contains information for each constraint that we are /// lowering. class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo { public: /// CallOperand - If this is the result output operand or a clobber /// this is null, otherwise it is the incoming operand to the CallInst. /// This gets modified as the asm is processed. SDValue CallOperand; /// AssignedRegs - If this is a register or register class operand, this /// contains the set of register corresponding to the operand. RegsForValue AssignedRegs; explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info) : TargetLowering::AsmOperandInfo(info), CallOperand(nullptr, 0) { } /// Whether or not this operand accesses memory bool hasMemory(const TargetLowering &TLI) const { // Indirect operand accesses access memory. if (isIndirect) return true; for (const auto &Code : Codes) if (TLI.getConstraintType(Code) == TargetLowering::C_Memory) return true; return false; } }; } // end anonymous namespace /// Make sure that the output operand \p OpInfo and its corresponding input /// operand \p MatchingOpInfo have compatible constraint types (otherwise error /// out). static void patchMatchingInput(const SDISelAsmOperandInfo &OpInfo, SDISelAsmOperandInfo &MatchingOpInfo, SelectionDAG &DAG) { if (OpInfo.ConstraintVT == MatchingOpInfo.ConstraintVT) return; const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo(); const auto &TLI = DAG.getTargetLoweringInfo(); std::pair MatchRC = TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode, OpInfo.ConstraintVT); std::pair InputRC = TLI.getRegForInlineAsmConstraint(TRI, MatchingOpInfo.ConstraintCode, MatchingOpInfo.ConstraintVT); if ((OpInfo.ConstraintVT.isInteger() != MatchingOpInfo.ConstraintVT.isInteger()) || (MatchRC.second != InputRC.second)) { // FIXME: error out in a more elegant fashion report_fatal_error("Unsupported asm: input constraint" " with a matching output constraint of" " incompatible type!"); } MatchingOpInfo.ConstraintVT = OpInfo.ConstraintVT; } /// Get a direct memory input to behave well as an indirect operand. /// This may introduce stores, hence the need for a \p Chain. /// \return The (possibly updated) chain. static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location, SDISelAsmOperandInfo &OpInfo, SelectionDAG &DAG) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // If we don't have an indirect input, put it in the constpool if we can, // otherwise spill it to a stack slot. // TODO: This isn't quite right. We need to handle these according to // the addressing mode that the constraint wants. Also, this may take // an additional register for the computation and we don't want that // either. // If the operand is a float, integer, or vector constant, spill to a // constant pool entry to get its address. const Value *OpVal = OpInfo.CallOperandVal; if (isa(OpVal) || isa(OpVal) || isa(OpVal) || isa(OpVal)) { OpInfo.CallOperand = DAG.getConstantPool( cast(OpVal), TLI.getPointerTy(DAG.getDataLayout())); return Chain; } // Otherwise, create a stack slot and emit a store to it before the asm. Type *Ty = OpVal->getType(); auto &DL = DAG.getDataLayout(); TypeSize TySize = DL.getTypeAllocSize(Ty); MachineFunction &MF = DAG.getMachineFunction(); const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); int StackID = 0; if (TySize.isScalable()) StackID = TFI->getStackIDForScalableVectors(); int SSFI = MF.getFrameInfo().CreateStackObject(TySize.getKnownMinValue(), DL.getPrefTypeAlign(Ty), false, nullptr, StackID); SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getFrameIndexTy(DL)); Chain = DAG.getTruncStore(Chain, Location, OpInfo.CallOperand, StackSlot, MachinePointerInfo::getFixedStack(MF, SSFI), TLI.getMemValueType(DL, Ty)); OpInfo.CallOperand = StackSlot; return Chain; } /// GetRegistersForValue - Assign registers (virtual or physical) for the /// specified operand. We prefer to assign virtual registers, to allow the /// register allocator to handle the assignment process. However, if the asm /// uses features that we can't model on machineinstrs, we have SDISel do the /// allocation. This produces generally horrible, but correct, code. /// /// OpInfo describes the operand /// RefOpInfo describes the matching operand if any, the operand otherwise static std::optional getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL, SDISelAsmOperandInfo &OpInfo, SDISelAsmOperandInfo &RefOpInfo) { LLVMContext &Context = *DAG.getContext(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); MachineFunction &MF = DAG.getMachineFunction(); SmallVector Regs; const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); // No work to do for memory/address operands. if (OpInfo.ConstraintType == TargetLowering::C_Memory || OpInfo.ConstraintType == TargetLowering::C_Address) return std::nullopt; // If this is a constraint for a single physreg, or a constraint for a // register class, find it. unsigned AssignedReg; const TargetRegisterClass *RC; std::tie(AssignedReg, RC) = TLI.getRegForInlineAsmConstraint( &TRI, RefOpInfo.ConstraintCode, RefOpInfo.ConstraintVT); // RC is unset only on failure. Return immediately. if (!RC) return std::nullopt; // Get the actual register value type. This is important, because the user // may have asked for (e.g.) the AX register in i32 type. We need to // remember that AX is actually i16 to get the right extension. const MVT RegVT = *TRI.legalclasstypes_begin(*RC); if (OpInfo.ConstraintVT != MVT::Other && RegVT != MVT::Untyped) { // If this is an FP operand in an integer register (or visa versa), or more // generally if the operand value disagrees with the register class we plan // to stick it in, fix the operand type. // // If this is an input value, the bitcast to the new type is done now. // Bitcast for output value is done at the end of visitInlineAsm(). if ((OpInfo.Type == InlineAsm::isOutput || OpInfo.Type == InlineAsm::isInput) && !TRI.isTypeLegalForClass(*RC, OpInfo.ConstraintVT)) { // Try to convert to the first EVT that the reg class contains. If the // types are identical size, use a bitcast to convert (e.g. two differing // vector types). Note: output bitcast is done at the end of // visitInlineAsm(). if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) { // Exclude indirect inputs while they are unsupported because the code // to perform the load is missing and thus OpInfo.CallOperand still // refers to the input address rather than the pointed-to value. if (OpInfo.Type == InlineAsm::isInput && !OpInfo.isIndirect) OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand); OpInfo.ConstraintVT = RegVT; // If the operand is an FP value and we want it in integer registers, // use the corresponding integer type. This turns an f64 value into // i64, which can be passed with two i32 values on a 32-bit machine. } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) { MVT VT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits()); if (OpInfo.Type == InlineAsm::isInput) OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, VT, OpInfo.CallOperand); OpInfo.ConstraintVT = VT; } } } // No need to allocate a matching input constraint since the constraint it's // matching to has already been allocated. if (OpInfo.isMatchingInputConstraint()) return std::nullopt; EVT ValueVT = OpInfo.ConstraintVT; if (OpInfo.ConstraintVT == MVT::Other) ValueVT = RegVT; // Initialize NumRegs. unsigned NumRegs = 1; if (OpInfo.ConstraintVT != MVT::Other) NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT, RegVT); // If this is a constraint for a specific physical register, like {r17}, // assign it now. // If this associated to a specific register, initialize iterator to correct // place. If virtual, make sure we have enough registers // Initialize iterator if necessary TargetRegisterClass::iterator I = RC->begin(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); // Do not check for single registers. if (AssignedReg) { I = std::find(I, RC->end(), AssignedReg); if (I == RC->end()) { // RC does not contain the selected register, which indicates a // mismatch between the register and the required type/bitwidth. return {AssignedReg}; } } for (; NumRegs; --NumRegs, ++I) { assert(I != RC->end() && "Ran out of registers to allocate!"); Register R = AssignedReg ? Register(*I) : RegInfo.createVirtualRegister(RC); Regs.push_back(R); } OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); return std::nullopt; } static unsigned findMatchingInlineAsmOperand(unsigned OperandNo, const std::vector &AsmNodeOperands) { // Scan until we find the definition we already emitted of this operand. unsigned CurOp = InlineAsm::Op_FirstOperand; for (; OperandNo; --OperandNo) { // Advance to the next operand. unsigned OpFlag = AsmNodeOperands[CurOp]->getAsZExtVal(); const InlineAsm::Flag F(OpFlag); assert( (F.isRegDefKind() || F.isRegDefEarlyClobberKind() || F.isMemKind()) && "Skipped past definitions?"); CurOp += F.getNumOperandRegisters() + 1; } return CurOp; } namespace { class ExtraFlags { unsigned Flags = 0; public: explicit ExtraFlags(const CallBase &Call) { const InlineAsm *IA = cast(Call.getCalledOperand()); if (IA->hasSideEffects()) Flags |= InlineAsm::Extra_HasSideEffects; if (IA->isAlignStack()) Flags |= InlineAsm::Extra_IsAlignStack; if (Call.isConvergent()) Flags |= InlineAsm::Extra_IsConvergent; Flags |= IA->getDialect() * InlineAsm::Extra_AsmDialect; } void update(const TargetLowering::AsmOperandInfo &OpInfo) { // Ideally, we would only check against memory constraints. However, the // meaning of an Other constraint can be target-specific and we can't easily // reason about it. Therefore, be conservative and set MayLoad/MayStore // for Other constraints as well. if (OpInfo.ConstraintType == TargetLowering::C_Memory || OpInfo.ConstraintType == TargetLowering::C_Other) { if (OpInfo.Type == InlineAsm::isInput) Flags |= InlineAsm::Extra_MayLoad; else if (OpInfo.Type == InlineAsm::isOutput) Flags |= InlineAsm::Extra_MayStore; else if (OpInfo.Type == InlineAsm::isClobber) Flags |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore); } } unsigned get() const { return Flags; } }; } // end anonymous namespace static bool isFunction(SDValue Op) { if (Op && Op.getOpcode() == ISD::GlobalAddress) { if (auto *GA = dyn_cast(Op)) { auto Fn = dyn_cast_or_null(GA->getGlobal()); // In normal "call dllimport func" instruction (non-inlineasm) it force // indirect access by specifing call opcode. And usually specially print // asm with indirect symbol (i.g: "*") according to opcode. Inline asm can // not do in this way now. (In fact, this is similar with "Data Access" // action). So here we ignore dllimport function. if (Fn && !Fn->hasDLLImportStorageClass()) return true; } } return false; } /// visitInlineAsm - Handle a call to an InlineAsm object. void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, const BasicBlock *EHPadBB) { const InlineAsm *IA = cast(Call.getCalledOperand()); /// ConstraintOperands - Information about all of the constraints. SmallVector ConstraintOperands; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints( DAG.getDataLayout(), DAG.getSubtarget().getRegisterInfo(), Call); // First Pass: Calculate HasSideEffects and ExtraFlags (AlignStack, // AsmDialect, MayLoad, MayStore). bool HasSideEffect = IA->hasSideEffects(); ExtraFlags ExtraInfo(Call); for (auto &T : TargetConstraints) { ConstraintOperands.push_back(SDISelAsmOperandInfo(T)); SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back(); if (OpInfo.CallOperandVal) OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); if (!HasSideEffect) HasSideEffect = OpInfo.hasMemory(TLI); // Determine if this InlineAsm MayLoad or MayStore based on the constraints. // FIXME: Could we compute this on OpInfo rather than T? // Compute the constraint code and ConstraintType to use. TLI.ComputeConstraintToUse(T, SDValue()); if (T.ConstraintType == TargetLowering::C_Immediate && OpInfo.CallOperand && !isa(OpInfo.CallOperand)) // We've delayed emitting a diagnostic like the "n" constraint because // inlining could cause an integer showing up. return emitInlineAsmError(Call, "constraint '" + Twine(T.ConstraintCode) + "' expects an integer constant " "expression"); ExtraInfo.update(T); } // We won't need to flush pending loads if this asm doesn't touch // memory and is nonvolatile. SDValue Glue, Chain = (HasSideEffect) ? getRoot() : DAG.getRoot(); bool EmitEHLabels = isa(Call); if (EmitEHLabels) { assert(EHPadBB && "InvokeInst must have an EHPadBB"); } bool IsCallBr = isa(Call); if (IsCallBr || EmitEHLabels) { // If this is a callbr or invoke we need to flush pending exports since // inlineasm_br and invoke are terminators. // We need to do this before nodes are glued to the inlineasm_br node. Chain = getControlRoot(); } MCSymbol *BeginLabel = nullptr; if (EmitEHLabels) { Chain = lowerStartEH(Chain, EHPadBB, BeginLabel); } int OpNo = -1; SmallVector AsmStrs; IA->collectAsmStrs(AsmStrs); // Second pass over the constraints: compute which constraint option to use. for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) { if (OpInfo.hasArg() || OpInfo.Type == InlineAsm::isOutput) OpNo++; // If this is an output operand with a matching input operand, look up the // matching input. If their types mismatch, e.g. one is an integer, the // other is floating point, or their sizes are different, flag it as an // error. if (OpInfo.hasMatchingInput()) { SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; patchMatchingInput(OpInfo, Input, DAG); } // Compute the constraint code and ConstraintType to use. TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG); if ((OpInfo.ConstraintType == TargetLowering::C_Memory && OpInfo.Type == InlineAsm::isClobber) || OpInfo.ConstraintType == TargetLowering::C_Address) continue; // In Linux PIC model, there are 4 cases about value/label addressing: // // 1: Function call or Label jmp inside the module. // 2: Data access (such as global variable, static variable) inside module. // 3: Function call or Label jmp outside the module. // 4: Data access (such as global variable) outside the module. // // Due to current llvm inline asm architecture designed to not "recognize" // the asm code, there are quite troubles for us to treat mem addressing // differently for same value/adress used in different instuctions. // For example, in pic model, call a func may in plt way or direclty // pc-related, but lea/mov a function adress may use got. // // Here we try to "recognize" function call for the case 1 and case 3 in // inline asm. And try to adjust the constraint for them. // // TODO: Due to current inline asm didn't encourage to jmp to the outsider // label, so here we don't handle jmp function label now, but we need to // enhance it (especilly in PIC model) if we meet meaningful requirements. if (OpInfo.isIndirect && isFunction(OpInfo.CallOperand) && TLI.isInlineAsmTargetBranch(AsmStrs, OpNo) && TM.getCodeModel() != CodeModel::Large) { OpInfo.isIndirect = false; OpInfo.ConstraintType = TargetLowering::C_Address; } // If this is a memory input, and if the operand is not indirect, do what we // need to provide an address for the memory input. if (OpInfo.ConstraintType == TargetLowering::C_Memory && !OpInfo.isIndirect) { assert((OpInfo.isMultipleAlternative || (OpInfo.Type == InlineAsm::isInput)) && "Can only indirectify direct input operands!"); // Memory operands really want the address of the value. Chain = getAddressForMemoryInput(Chain, getCurSDLoc(), OpInfo, DAG); // There is no longer a Value* corresponding to this operand. OpInfo.CallOperandVal = nullptr; // It is now an indirect operand. OpInfo.isIndirect = true; } } // AsmNodeOperands - The operands for the ISD::INLINEASM node. std::vector AsmNodeOperands; AsmNodeOperands.push_back(SDValue()); // reserve space for input chain AsmNodeOperands.push_back(DAG.getTargetExternalSymbol( IA->getAsmString().c_str(), TLI.getProgramPointerTy(DAG.getDataLayout()))); // If we have a !srcloc metadata node associated with it, we want to attach // this to the ultimately generated inline asm machineinstr. To do this, we // pass in the third operand as this (potentially null) inline asm MDNode. const MDNode *SrcLoc = Call.getMetadata("srcloc"); AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc)); // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore // bits as operand 3. AsmNodeOperands.push_back(DAG.getTargetConstant( ExtraInfo.get(), getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); // Third pass: Loop over operands to prepare DAG-level operands.. As part of // this, assign virtual and physical registers for inputs and otput. for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) { // Assign Registers. SDISelAsmOperandInfo &RefOpInfo = OpInfo.isMatchingInputConstraint() ? ConstraintOperands[OpInfo.getMatchedOperand()] : OpInfo; const auto RegError = getRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo); if (RegError) { const MachineFunction &MF = DAG.getMachineFunction(); const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); const char *RegName = TRI.getName(*RegError); emitInlineAsmError(Call, "register '" + Twine(RegName) + "' allocated for constraint '" + Twine(OpInfo.ConstraintCode) + "' does not match required type"); return; } auto DetectWriteToReservedRegister = [&]() { const MachineFunction &MF = DAG.getMachineFunction(); const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); for (unsigned Reg : OpInfo.AssignedRegs.Regs) { if (Register::isPhysicalRegister(Reg) && TRI.isInlineAsmReadOnlyReg(MF, Reg)) { const char *RegName = TRI.getName(Reg); emitInlineAsmError(Call, "write to reserved register '" + Twine(RegName) + "'"); return true; } } return false; }; assert((OpInfo.ConstraintType != TargetLowering::C_Address || (OpInfo.Type == InlineAsm::isInput && !OpInfo.isMatchingInputConstraint())) && "Only address as input operand is allowed."); switch (OpInfo.Type) { case InlineAsm::isOutput: if (OpInfo.ConstraintType == TargetLowering::C_Memory) { const InlineAsm::ConstraintCode ConstraintID = TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); assert(ConstraintID != InlineAsm::ConstraintCode::Unknown && "Failed to convert memory constraint code to constraint id."); // Add information to the INLINEASM node to know about this output. InlineAsm::Flag OpFlags(InlineAsm::Kind::Mem, 1); OpFlags.setMemConstraint(ConstraintID); AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, getCurSDLoc(), MVT::i32)); AsmNodeOperands.push_back(OpInfo.CallOperand); } else { // Otherwise, this outputs to a register (directly for C_Register / // C_RegisterClass, and a target-defined fashion for // C_Immediate/C_Other). Find a register that we can use. if (OpInfo.AssignedRegs.Regs.empty()) { emitInlineAsmError( Call, "couldn't allocate output register for constraint '" + Twine(OpInfo.ConstraintCode) + "'"); return; } if (DetectWriteToReservedRegister()) return; // Add information to the INLINEASM node to know that this register is // set. OpInfo.AssignedRegs.AddInlineAsmOperands( OpInfo.isEarlyClobber ? InlineAsm::Kind::RegDefEarlyClobber : InlineAsm::Kind::RegDef, false, 0, getCurSDLoc(), DAG, AsmNodeOperands); } break; case InlineAsm::isInput: case InlineAsm::isLabel: { SDValue InOperandVal = OpInfo.CallOperand; if (OpInfo.isMatchingInputConstraint()) { // If this is required to match an output register we have already set, // just use its register. auto CurOp = findMatchingInlineAsmOperand(OpInfo.getMatchedOperand(), AsmNodeOperands); InlineAsm::Flag Flag(AsmNodeOperands[CurOp]->getAsZExtVal()); if (Flag.isRegDefKind() || Flag.isRegDefEarlyClobberKind()) { if (OpInfo.isIndirect) { // This happens on gcc/testsuite/gcc.dg/pr8788-1.c emitInlineAsmError(Call, "inline asm not supported yet: " "don't know how to handle tied " "indirect register inputs"); return; } SmallVector Regs; MachineFunction &MF = DAG.getMachineFunction(); MachineRegisterInfo &MRI = MF.getRegInfo(); const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); auto *R = cast(AsmNodeOperands[CurOp+1]); Register TiedReg = R->getReg(); MVT RegVT = R->getSimpleValueType(0); const TargetRegisterClass *RC = TiedReg.isVirtual() ? MRI.getRegClass(TiedReg) : RegVT != MVT::Untyped ? TLI.getRegClassFor(RegVT) : TRI.getMinimalPhysRegClass(TiedReg); for (unsigned i = 0, e = Flag.getNumOperandRegisters(); i != e; ++i) Regs.push_back(MRI.createVirtualRegister(RC)); RegsForValue MatchedRegs(Regs, RegVT, InOperandVal.getValueType()); SDLoc dl = getCurSDLoc(); // Use the produced MatchedRegs object to MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Glue, &Call); MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind::RegUse, true, OpInfo.getMatchedOperand(), dl, DAG, AsmNodeOperands); break; } assert(Flag.isMemKind() && "Unknown matching constraint!"); assert(Flag.getNumOperandRegisters() == 1 && "Unexpected number of operands"); // Add information to the INLINEASM node to know about this input. // See InlineAsm.h isUseOperandTiedToDef. Flag.clearMemConstraint(); Flag.setMatchingOp(OpInfo.getMatchedOperand()); AsmNodeOperands.push_back(DAG.getTargetConstant( Flag, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]); break; } // Treat indirect 'X' constraint as memory. if (OpInfo.ConstraintType == TargetLowering::C_Other && OpInfo.isIndirect) OpInfo.ConstraintType = TargetLowering::C_Memory; if (OpInfo.ConstraintType == TargetLowering::C_Immediate || OpInfo.ConstraintType == TargetLowering::C_Other) { std::vector Ops; TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode, Ops, DAG); if (Ops.empty()) { if (OpInfo.ConstraintType == TargetLowering::C_Immediate) if (isa(InOperandVal)) { emitInlineAsmError(Call, "value out of range for constraint '" + Twine(OpInfo.ConstraintCode) + "'"); return; } emitInlineAsmError(Call, "invalid operand for inline asm constraint '" + Twine(OpInfo.ConstraintCode) + "'"); return; } // Add information to the INLINEASM node to know about this input. InlineAsm::Flag ResOpType(InlineAsm::Kind::Imm, Ops.size()); AsmNodeOperands.push_back(DAG.getTargetConstant( ResOpType, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); llvm::append_range(AsmNodeOperands, Ops); break; } if (OpInfo.ConstraintType == TargetLowering::C_Memory) { assert((OpInfo.isIndirect || OpInfo.ConstraintType != TargetLowering::C_Memory) && "Operand must be indirect to be a mem!"); assert(InOperandVal.getValueType() == TLI.getPointerTy(DAG.getDataLayout()) && "Memory operands expect pointer values"); const InlineAsm::ConstraintCode ConstraintID = TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); assert(ConstraintID != InlineAsm::ConstraintCode::Unknown && "Failed to convert memory constraint code to constraint id."); // Add information to the INLINEASM node to know about this input. InlineAsm::Flag ResOpType(InlineAsm::Kind::Mem, 1); ResOpType.setMemConstraint(ConstraintID); AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, getCurSDLoc(), MVT::i32)); AsmNodeOperands.push_back(InOperandVal); break; } if (OpInfo.ConstraintType == TargetLowering::C_Address) { const InlineAsm::ConstraintCode ConstraintID = TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); assert(ConstraintID != InlineAsm::ConstraintCode::Unknown && "Failed to convert memory constraint code to constraint id."); InlineAsm::Flag ResOpType(InlineAsm::Kind::Mem, 1); SDValue AsmOp = InOperandVal; if (isFunction(InOperandVal)) { auto *GA = cast(InOperandVal); ResOpType = InlineAsm::Flag(InlineAsm::Kind::Func, 1); AsmOp = DAG.getTargetGlobalAddress(GA->getGlobal(), getCurSDLoc(), InOperandVal.getValueType(), GA->getOffset()); } // Add information to the INLINEASM node to know about this input. ResOpType.setMemConstraint(ConstraintID); AsmNodeOperands.push_back( DAG.getTargetConstant(ResOpType, getCurSDLoc(), MVT::i32)); AsmNodeOperands.push_back(AsmOp); break; } if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass && OpInfo.ConstraintType != TargetLowering::C_Register) { emitInlineAsmError(Call, "unknown asm constraint '" + Twine(OpInfo.ConstraintCode) + "'"); return; } // TODO: Support this. if (OpInfo.isIndirect) { emitInlineAsmError( Call, "Don't know how to handle indirect register inputs yet " "for constraint '" + Twine(OpInfo.ConstraintCode) + "'"); return; } // Copy the input into the appropriate registers. if (OpInfo.AssignedRegs.Regs.empty()) { emitInlineAsmError(Call, "couldn't allocate input reg for constraint '" + Twine(OpInfo.ConstraintCode) + "'"); return; } if (DetectWriteToReservedRegister()) return; SDLoc dl = getCurSDLoc(); OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Glue, &Call); OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind::RegUse, false, 0, dl, DAG, AsmNodeOperands); break; } case InlineAsm::isClobber: // Add the clobbered value to the operand list, so that the register // allocator is aware that the physreg got clobbered. if (!OpInfo.AssignedRegs.Regs.empty()) OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind::Clobber, false, 0, getCurSDLoc(), DAG, AsmNodeOperands); break; } } // Finish up input operands. Set the input chain and add the flag last. AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; if (Glue.getNode()) AsmNodeOperands.push_back(Glue); unsigned ISDOpc = IsCallBr ? ISD::INLINEASM_BR : ISD::INLINEASM; Chain = DAG.getNode(ISDOpc, getCurSDLoc(), DAG.getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); Glue = Chain.getValue(1); // Do additional work to generate outputs. SmallVector ResultVTs; SmallVector ResultValues; SmallVector OutChains; llvm::Type *CallResultType = Call.getType(); ArrayRef ResultTypes; if (StructType *StructResult = dyn_cast(CallResultType)) ResultTypes = StructResult->elements(); else if (!CallResultType->isVoidTy()) ResultTypes = ArrayRef(CallResultType); auto CurResultType = ResultTypes.begin(); auto handleRegAssign = [&](SDValue V) { assert(CurResultType != ResultTypes.end() && "Unexpected value"); assert((*CurResultType)->isSized() && "Unexpected unsized type"); EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), *CurResultType); ++CurResultType; // If the type of the inline asm call site return value is different but has // same size as the type of the asm output bitcast it. One example of this // is for vectors with different width / number of elements. This can // happen for register classes that can contain multiple different value // types. The preg or vreg allocated may not have the same VT as was // expected. // // This can also happen for a return value that disagrees with the register // class it is put in, eg. a double in a general-purpose register on a // 32-bit machine. if (ResultVT != V.getValueType() && ResultVT.getSizeInBits() == V.getValueSizeInBits()) V = DAG.getNode(ISD::BITCAST, getCurSDLoc(), ResultVT, V); else if (ResultVT != V.getValueType() && ResultVT.isInteger() && V.getValueType().isInteger()) { // If a result value was tied to an input value, the computed result // may have a wider width than the expected result. Extract the // relevant portion. V = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultVT, V); } assert(ResultVT == V.getValueType() && "Asm result value mismatch!"); ResultVTs.push_back(ResultVT); ResultValues.push_back(V); }; // Deal with output operands. for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) { if (OpInfo.Type == InlineAsm::isOutput) { SDValue Val; // Skip trivial output operands. if (OpInfo.AssignedRegs.Regs.empty()) continue; switch (OpInfo.ConstraintType) { case TargetLowering::C_Register: case TargetLowering::C_RegisterClass: Val = OpInfo.AssignedRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, &Glue, &Call); break; case TargetLowering::C_Immediate: case TargetLowering::C_Other: Val = TLI.LowerAsmOutputForConstraint(Chain, Glue, getCurSDLoc(), OpInfo, DAG); break; case TargetLowering::C_Memory: break; // Already handled. case TargetLowering::C_Address: break; // Silence warning. case TargetLowering::C_Unknown: assert(false && "Unexpected unknown constraint"); } // Indirect output manifest as stores. Record output chains. if (OpInfo.isIndirect) { const Value *Ptr = OpInfo.CallOperandVal; assert(Ptr && "Expected value CallOperandVal for indirect asm operand"); SDValue Store = DAG.getStore(Chain, getCurSDLoc(), Val, getValue(Ptr), MachinePointerInfo(Ptr)); OutChains.push_back(Store); } else { // generate CopyFromRegs to associated registers. assert(!Call.getType()->isVoidTy() && "Bad inline asm!"); if (Val.getOpcode() == ISD::MERGE_VALUES) { for (const SDValue &V : Val->op_values()) handleRegAssign(V); } else handleRegAssign(Val); } } } // Set results. if (!ResultValues.empty()) { assert(CurResultType == ResultTypes.end() && "Mismatch in number of ResultTypes"); assert(ResultValues.size() == ResultTypes.size() && "Mismatch in number of output operands in asm result"); SDValue V = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), DAG.getVTList(ResultVTs), ResultValues); setValue(&Call, V); } // Collect store chains. if (!OutChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, OutChains); if (EmitEHLabels) { Chain = lowerEndEH(Chain, cast(&Call), EHPadBB, BeginLabel); } // Only Update Root if inline assembly has a memory effect. if (ResultValues.empty() || HasSideEffect || !OutChains.empty() || IsCallBr || EmitEHLabels) DAG.setRoot(Chain); } void SelectionDAGBuilder::emitInlineAsmError(const CallBase &Call, const Twine &Message) { LLVMContext &Ctx = *DAG.getContext(); Ctx.emitError(&Call, Message); // Make sure we leave the DAG in a valid state const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector ValueVTs; ComputeValueVTs(TLI, DAG.getDataLayout(), Call.getType(), ValueVTs); if (ValueVTs.empty()) return; SmallVector Ops; for (const EVT &VT : ValueVTs) Ops.push_back(DAG.getUNDEF(VT)); setValue(&Call, DAG.getMergeValues(Ops, getCurSDLoc())); } void SelectionDAGBuilder::visitVAStart(const CallInst &I) { DAG.setRoot(DAG.getNode(ISD::VASTART, getCurSDLoc(), MVT::Other, getRoot(), getValue(I.getArgOperand(0)), DAG.getSrcValue(I.getArgOperand(0)))); } void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const DataLayout &DL = DAG.getDataLayout(); SDValue V = DAG.getVAArg( TLI.getMemValueType(DAG.getDataLayout(), I.getType()), getCurSDLoc(), getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)), DL.getABITypeAlign(I.getType()).value()); DAG.setRoot(V.getValue(1)); if (I.getType()->isPointerTy()) V = DAG.getPtrExtOrTrunc( V, getCurSDLoc(), TLI.getValueType(DAG.getDataLayout(), I.getType())); setValue(&I, V); } void SelectionDAGBuilder::visitVAEnd(const CallInst &I) { DAG.setRoot(DAG.getNode(ISD::VAEND, getCurSDLoc(), MVT::Other, getRoot(), getValue(I.getArgOperand(0)), DAG.getSrcValue(I.getArgOperand(0)))); } void SelectionDAGBuilder::visitVACopy(const CallInst &I) { DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurSDLoc(), MVT::Other, getRoot(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), DAG.getSrcValue(I.getArgOperand(0)), DAG.getSrcValue(I.getArgOperand(1)))); } SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG, const Instruction &I, SDValue Op) { std::optional CR = getRange(I); if (!CR || CR->isFullSet() || CR->isEmptySet() || CR->isUpperWrapped()) return Op; APInt Lo = CR->getUnsignedMin(); if (!Lo.isMinValue()) return Op; APInt Hi = CR->getUnsignedMax(); unsigned Bits = std::max(Hi.getActiveBits(), static_cast(IntegerType::MIN_INT_BITS)); EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), Bits); SDLoc SL = getCurSDLoc(); SDValue ZExt = DAG.getNode(ISD::AssertZext, SL, Op.getValueType(), Op, DAG.getValueType(SmallVT)); unsigned NumVals = Op.getNode()->getNumValues(); if (NumVals == 1) return ZExt; SmallVector Ops; Ops.push_back(ZExt); for (unsigned I = 1; I != NumVals; ++I) Ops.push_back(Op.getValue(I)); return DAG.getMergeValues(Ops, SL); } /// Populate a CallLowerinInfo (into \p CLI) based on the properties of /// the call being lowered. /// /// This is a helper for lowering intrinsics that follow a target calling /// convention or require stack pointer adjustment. Only a subset of the /// intrinsic's operands need to participate in the calling convention. void SelectionDAGBuilder::populateCallLoweringInfo( TargetLowering::CallLoweringInfo &CLI, const CallBase *Call, unsigned ArgIdx, unsigned NumArgs, SDValue Callee, Type *ReturnTy, AttributeSet RetAttrs, bool IsPatchPoint) { TargetLowering::ArgListTy Args; Args.reserve(NumArgs); // Populate the argument list. // Attributes for args start at offset 1, after the return attribute. for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs; ArgI != ArgE; ++ArgI) { const Value *V = Call->getOperand(ArgI); assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic."); TargetLowering::ArgListEntry Entry; Entry.Node = getValue(V); Entry.Ty = V->getType(); Entry.setAttributes(Call, ArgI); Args.push_back(Entry); } CLI.setDebugLoc(getCurSDLoc()) .setChain(getRoot()) .setCallee(Call->getCallingConv(), ReturnTy, Callee, std::move(Args), RetAttrs) .setDiscardResult(Call->use_empty()) .setIsPatchPoint(IsPatchPoint) .setIsPreallocated( Call->countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0); } /// Add a stack map intrinsic call's live variable operands to a stackmap /// or patchpoint target node's operand list. /// /// Constants are converted to TargetConstants purely as an optimization to /// avoid constant materialization and register allocation. /// /// FrameIndex operands are converted to TargetFrameIndex so that ISEL does not /// generate addess computation nodes, and so FinalizeISel can convert the /// TargetFrameIndex into a DirectMemRefOp StackMap location. This avoids /// address materialization and register allocation, but may also be required /// for correctness. If a StackMap (or PatchPoint) intrinsic directly uses an /// alloca in the entry block, then the runtime may assume that the alloca's /// StackMap location can be read immediately after compilation and that the /// location is valid at any point during execution (this is similar to the /// assumption made by the llvm.gcroot intrinsic). If the alloca's location were /// only available in a register, then the runtime would need to trap when /// execution reaches the StackMap in order to read the alloca's location. static void addStackMapLiveVars(const CallBase &Call, unsigned StartIdx, const SDLoc &DL, SmallVectorImpl &Ops, SelectionDAGBuilder &Builder) { SelectionDAG &DAG = Builder.DAG; for (unsigned I = StartIdx; I < Call.arg_size(); I++) { SDValue Op = Builder.getValue(Call.getArgOperand(I)); // Things on the stack are pointer-typed, meaning that they are already // legal and can be emitted directly to target nodes. if (FrameIndexSDNode *FI = dyn_cast(Op)) { Ops.push_back(DAG.getTargetFrameIndex(FI->getIndex(), Op.getValueType())); } else { // Otherwise emit a target independent node to be legalised. Ops.push_back(Builder.getValue(Call.getArgOperand(I))); } } } /// Lower llvm.experimental.stackmap. void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { // void @llvm.experimental.stackmap(i64 , i32 , // [live variables...]) assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value."); SDValue Chain, InGlue, Callee; SmallVector Ops; SDLoc DL = getCurSDLoc(); Callee = getValue(CI.getCalledOperand()); // The stackmap intrinsic only records the live variables (the arguments // passed to it) and emits NOPS (if requested). Unlike the patchpoint // intrinsic, this won't be lowered to a function call. This means we don't // have to worry about calling conventions and target specific lowering code. // Instead we perform the call lowering right here. // // chain, flag = CALLSEQ_START(chain, 0, 0) // chain, flag = STACKMAP(id, nbytes, ..., chain, flag) // chain, flag = CALLSEQ_END(chain, 0, 0, flag) // Chain = DAG.getCALLSEQ_START(getRoot(), 0, 0, DL); InGlue = Chain.getValue(1); // Add the STACKMAP operands, starting with DAG house-keeping. Ops.push_back(Chain); Ops.push_back(InGlue); // Add the , operands. // // These do not require legalisation, and can be emitted directly to target // constant nodes. SDValue ID = getValue(CI.getArgOperand(0)); assert(ID.getValueType() == MVT::i64); SDValue IDConst = DAG.getTargetConstant(ID->getAsZExtVal(), DL, ID.getValueType()); Ops.push_back(IDConst); SDValue Shad = getValue(CI.getArgOperand(1)); assert(Shad.getValueType() == MVT::i32); SDValue ShadConst = DAG.getTargetConstant(Shad->getAsZExtVal(), DL, Shad.getValueType()); Ops.push_back(ShadConst); // Add the live variables. addStackMapLiveVars(CI, 2, DL, Ops, *this); // Create the STACKMAP node. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); Chain = DAG.getNode(ISD::STACKMAP, DL, NodeTys, Ops); InGlue = Chain.getValue(1); Chain = DAG.getCALLSEQ_END(Chain, 0, 0, InGlue, DL); // Stackmaps don't generate values, so nothing goes into the NodeMap. // Set the root to the target-lowered call chain. DAG.setRoot(Chain); // Inform the Frame Information that we have a stackmap in this function. FuncInfo.MF->getFrameInfo().setHasStackMap(); } /// Lower llvm.experimental.patchpoint directly to its target opcode. void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB, const BasicBlock *EHPadBB) { // @llvm.experimental.patchpoint.(i64 , // i32 , // i8* , // i32 , // [Args...], // [live variables...]) CallingConv::ID CC = CB.getCallingConv(); bool IsAnyRegCC = CC == CallingConv::AnyReg; bool HasDef = !CB.getType()->isVoidTy(); SDLoc dl = getCurSDLoc(); SDValue Callee = getValue(CB.getArgOperand(PatchPointOpers::TargetPos)); // Handle immediate and symbolic callees. if (auto* ConstCallee = dyn_cast(Callee)) Callee = DAG.getIntPtrConstant(ConstCallee->getZExtValue(), dl, /*isTarget=*/true); else if (auto* SymbolicCallee = dyn_cast(Callee)) Callee = DAG.getTargetGlobalAddress(SymbolicCallee->getGlobal(), SDLoc(SymbolicCallee), SymbolicCallee->getValueType(0)); // Get the real number of arguments participating in the call SDValue NArgVal = getValue(CB.getArgOperand(PatchPointOpers::NArgPos)); unsigned NumArgs = NArgVal->getAsZExtVal(); // Skip the four meta args: , , , // Intrinsics include all meta-operands up to but not including CC. unsigned NumMetaOpers = PatchPointOpers::CCPos; assert(CB.arg_size() >= NumMetaOpers + NumArgs && "Not enough arguments provided to the patchpoint intrinsic"); // For AnyRegCC the arguments are lowered later on manually. unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs; Type *ReturnTy = IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CB.getType(); TargetLowering::CallLoweringInfo CLI(DAG); populateCallLoweringInfo(CLI, &CB, NumMetaOpers, NumCallArgs, Callee, ReturnTy, CB.getAttributes().getRetAttrs(), true); std::pair Result = lowerInvokable(CLI, EHPadBB); SDNode *CallEnd = Result.second.getNode(); if (CallEnd->getOpcode() == ISD::EH_LABEL) CallEnd = CallEnd->getOperand(0).getNode(); if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg)) CallEnd = CallEnd->getOperand(0).getNode(); /// Get a call instruction from the call sequence chain. /// Tail calls are not allowed. assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && "Expected a callseq node."); SDNode *Call = CallEnd->getOperand(0).getNode(); bool HasGlue = Call->getGluedNode(); // Replace the target specific call node with the patchable intrinsic. SmallVector Ops; // Push the chain. Ops.push_back(*(Call->op_begin())); // Optionally, push the glue (if any). if (HasGlue) Ops.push_back(*(Call->op_end() - 1)); // Push the register mask info. if (HasGlue) Ops.push_back(*(Call->op_end() - 2)); else Ops.push_back(*(Call->op_end() - 1)); // Add the and constants. SDValue IDVal = getValue(CB.getArgOperand(PatchPointOpers::IDPos)); Ops.push_back(DAG.getTargetConstant(IDVal->getAsZExtVal(), dl, MVT::i64)); SDValue NBytesVal = getValue(CB.getArgOperand(PatchPointOpers::NBytesPos)); Ops.push_back(DAG.getTargetConstant(NBytesVal->getAsZExtVal(), dl, MVT::i32)); // Add the callee. Ops.push_back(Callee); // Adjust to account for any arguments that have been passed on the // stack instead. // Call Node: Chain, Target, {Args}, RegMask, [Glue] unsigned NumCallRegArgs = Call->getNumOperands() - (HasGlue ? 4 : 3); NumCallRegArgs = IsAnyRegCC ? NumArgs : NumCallRegArgs; Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, dl, MVT::i32)); // Add the calling convention Ops.push_back(DAG.getTargetConstant((unsigned)CC, dl, MVT::i32)); // Add the arguments we omitted previously. The register allocator should // place these in any free register. if (IsAnyRegCC) for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) Ops.push_back(getValue(CB.getArgOperand(i))); // Push the arguments from the call instruction. SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1; Ops.append(Call->op_begin() + 2, e); // Push live variables for the stack map. addStackMapLiveVars(CB, NumMetaOpers + NumArgs, dl, Ops, *this); SDVTList NodeTys; if (IsAnyRegCC && HasDef) { // Create the return types based on the intrinsic definition const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector ValueVTs; ComputeValueVTs(TLI, DAG.getDataLayout(), CB.getType(), ValueVTs); assert(ValueVTs.size() == 1 && "Expected only one return value type."); // There is always a chain and a glue type at the end ValueVTs.push_back(MVT::Other); ValueVTs.push_back(MVT::Glue); NodeTys = DAG.getVTList(ValueVTs); } else NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); // Replace the target specific call node with a PATCHPOINT node. SDValue PPV = DAG.getNode(ISD::PATCHPOINT, dl, NodeTys, Ops); // Update the NodeMap. if (HasDef) { if (IsAnyRegCC) setValue(&CB, SDValue(PPV.getNode(), 0)); else setValue(&CB, Result.first); } // Fixup the consumers of the intrinsic. The chain and glue may be used in the // call sequence. Furthermore the location of the chain and glue can change // when the AnyReg calling convention is used and the intrinsic returns a // value. if (IsAnyRegCC && HasDef) { SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)}; SDValue To[] = {PPV.getValue(1), PPV.getValue(2)}; DAG.ReplaceAllUsesOfValuesWith(From, To, 2); } else DAG.ReplaceAllUsesWith(Call, PPV.getNode()); DAG.DeleteNode(Call); // Inform the Frame Information that we have a patchpoint in this function. FuncInfo.MF->getFrameInfo().setHasPatchPoint(); } void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, unsigned Intrinsic) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2; if (I.arg_size() > 1) Op2 = getValue(I.getArgOperand(1)); SDLoc dl = getCurSDLoc(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); SDValue Res; SDNodeFlags SDFlags; if (auto *FPMO = dyn_cast(&I)) SDFlags.copyFMF(*FPMO); switch (Intrinsic) { case Intrinsic::vector_reduce_fadd: if (SDFlags.hasAllowReassociation()) Res = DAG.getNode(ISD::FADD, dl, VT, Op1, DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2, SDFlags), SDFlags); else Res = DAG.getNode(ISD::VECREDUCE_SEQ_FADD, dl, VT, Op1, Op2, SDFlags); break; case Intrinsic::vector_reduce_fmul: if (SDFlags.hasAllowReassociation()) Res = DAG.getNode(ISD::FMUL, dl, VT, Op1, DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2, SDFlags), SDFlags); else Res = DAG.getNode(ISD::VECREDUCE_SEQ_FMUL, dl, VT, Op1, Op2, SDFlags); break; case Intrinsic::vector_reduce_add: Res = DAG.getNode(ISD::VECREDUCE_ADD, dl, VT, Op1); break; case Intrinsic::vector_reduce_mul: Res = DAG.getNode(ISD::VECREDUCE_MUL, dl, VT, Op1); break; case Intrinsic::vector_reduce_and: Res = DAG.getNode(ISD::VECREDUCE_AND, dl, VT, Op1); break; case Intrinsic::vector_reduce_or: Res = DAG.getNode(ISD::VECREDUCE_OR, dl, VT, Op1); break; case Intrinsic::vector_reduce_xor: Res = DAG.getNode(ISD::VECREDUCE_XOR, dl, VT, Op1); break; case Intrinsic::vector_reduce_smax: Res = DAG.getNode(ISD::VECREDUCE_SMAX, dl, VT, Op1); break; case Intrinsic::vector_reduce_smin: Res = DAG.getNode(ISD::VECREDUCE_SMIN, dl, VT, Op1); break; case Intrinsic::vector_reduce_umax: Res = DAG.getNode(ISD::VECREDUCE_UMAX, dl, VT, Op1); break; case Intrinsic::vector_reduce_umin: Res = DAG.getNode(ISD::VECREDUCE_UMIN, dl, VT, Op1); break; case Intrinsic::vector_reduce_fmax: Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1, SDFlags); break; case Intrinsic::vector_reduce_fmin: Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1, SDFlags); break; case Intrinsic::vector_reduce_fmaximum: Res = DAG.getNode(ISD::VECREDUCE_FMAXIMUM, dl, VT, Op1, SDFlags); break; case Intrinsic::vector_reduce_fminimum: Res = DAG.getNode(ISD::VECREDUCE_FMINIMUM, dl, VT, Op1, SDFlags); break; default: llvm_unreachable("Unhandled vector reduce intrinsic"); } setValue(&I, Res); } /// Returns an AttributeList representing the attributes applied to the return /// value of the given call. static AttributeList getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) { SmallVector Attrs; if (CLI.RetSExt) Attrs.push_back(Attribute::SExt); if (CLI.RetZExt) Attrs.push_back(Attribute::ZExt); if (CLI.IsInReg) Attrs.push_back(Attribute::InReg); return AttributeList::get(CLI.RetTy->getContext(), AttributeList::ReturnIndex, Attrs); } /// TargetLowering::LowerCallTo - This is the default LowerCallTo /// implementation, which just calls LowerCall. /// FIXME: When all targets are /// migrated to using LowerCall, this hook should be integrated into SDISel. std::pair TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // Handle the incoming return values from the call. CLI.Ins.clear(); Type *OrigRetTy = CLI.RetTy; SmallVector RetTys; SmallVector Offsets; auto &DL = CLI.DAG.getDataLayout(); ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets); if (CLI.IsPostTypeLegalization) { // If we are lowering a libcall after legalization, split the return type. SmallVector OldRetTys; SmallVector OldOffsets; RetTys.swap(OldRetTys); Offsets.swap(OldOffsets); for (size_t i = 0, e = OldRetTys.size(); i != e; ++i) { EVT RetVT = OldRetTys[i]; uint64_t Offset = OldOffsets[i]; MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), RetVT); unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), RetVT); unsigned RegisterVTByteSZ = RegisterVT.getSizeInBits() / 8; RetTys.append(NumRegs, RegisterVT); for (unsigned j = 0; j != NumRegs; ++j) Offsets.push_back(TypeSize::getFixed(Offset + j * RegisterVTByteSZ)); } } SmallVector Outs; GetReturnInfo(CLI.CallConv, CLI.RetTy, getReturnAttrs(CLI), Outs, *this, DL); bool CanLowerReturn = this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(), CLI.IsVarArg, Outs, CLI.RetTy->getContext()); SDValue DemoteStackSlot; int DemoteStackIdx = -100; if (!CanLowerReturn) { // FIXME: equivalent assert? // assert(!CS.hasInAllocaArgument() && // "sret demotion is incompatible with inalloca"); uint64_t TySize = DL.getTypeAllocSize(CLI.RetTy); Align Alignment = DL.getPrefTypeAlign(CLI.RetTy); MachineFunction &MF = CLI.DAG.getMachineFunction(); DemoteStackIdx = MF.getFrameInfo().CreateStackObject(TySize, Alignment, false); Type *StackSlotPtrType = PointerType::get(CLI.RetTy, DL.getAllocaAddrSpace()); DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getFrameIndexTy(DL)); ArgListEntry Entry; Entry.Node = DemoteStackSlot; Entry.Ty = StackSlotPtrType; Entry.IsSExt = false; Entry.IsZExt = false; Entry.IsInReg = false; Entry.IsSRet = true; Entry.IsNest = false; Entry.IsByVal = false; Entry.IsByRef = false; Entry.IsReturned = false; Entry.IsSwiftSelf = false; Entry.IsSwiftAsync = false; Entry.IsSwiftError = false; Entry.IsCFGuardTarget = false; Entry.Alignment = Alignment; CLI.getArgs().insert(CLI.getArgs().begin(), Entry); CLI.NumFixedArgs += 1; CLI.getArgs()[0].IndirectType = CLI.RetTy; CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext()); // sret demotion isn't compatible with tail-calls, since the sret argument // points into the callers stack frame. CLI.IsTailCall = false; } else { bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters( CLI.RetTy, CLI.CallConv, CLI.IsVarArg, DL); for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { ISD::ArgFlagsTy Flags; if (NeedsRegBlock) { Flags.setInConsecutiveRegs(); if (I == RetTys.size() - 1) Flags.setInConsecutiveRegsLast(); } EVT VT = RetTys[I]; MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), CLI.CallConv, VT); unsigned NumRegs = getNumRegistersForCallingConv(CLI.RetTy->getContext(), CLI.CallConv, VT); for (unsigned i = 0; i != NumRegs; ++i) { ISD::InputArg MyFlags; MyFlags.Flags = Flags; MyFlags.VT = RegisterVT; MyFlags.ArgVT = VT; MyFlags.Used = CLI.IsReturnValueUsed; if (CLI.RetTy->isPointerTy()) { MyFlags.Flags.setPointer(); MyFlags.Flags.setPointerAddrSpace( cast(CLI.RetTy)->getAddressSpace()); } if (CLI.RetSExt) MyFlags.Flags.setSExt(); if (CLI.RetZExt) MyFlags.Flags.setZExt(); if (CLI.IsInReg) MyFlags.Flags.setInReg(); CLI.Ins.push_back(MyFlags); } } } // We push in swifterror return as the last element of CLI.Ins. ArgListTy &Args = CLI.getArgs(); if (supportSwiftError()) { for (const ArgListEntry &Arg : Args) { if (Arg.IsSwiftError) { ISD::InputArg MyFlags; MyFlags.VT = getPointerTy(DL); MyFlags.ArgVT = EVT(getPointerTy(DL)); MyFlags.Flags.setSwiftError(); CLI.Ins.push_back(MyFlags); } } } // Handle all of the outgoing arguments. CLI.Outs.clear(); CLI.OutVals.clear(); for (unsigned i = 0, e = Args.size(); i != e; ++i) { SmallVector ValueVTs; ComputeValueVTs(*this, DL, Args[i].Ty, ValueVTs); // FIXME: Split arguments if CLI.IsPostTypeLegalization Type *FinalType = Args[i].Ty; if (Args[i].IsByVal) FinalType = Args[i].IndirectType; bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters( FinalType, CLI.CallConv, CLI.IsVarArg, DL); for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; ++Value) { EVT VT = ValueVTs[Value]; Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext()); SDValue Op = SDValue(Args[i].Node.getNode(), Args[i].Node.getResNo() + Value); ISD::ArgFlagsTy Flags; // Certain targets (such as MIPS), may have a different ABI alignment // for a type depending on the context. Give the target a chance to // specify the alignment it wants. const Align OriginalAlignment(getABIAlignmentForCallingConv(ArgTy, DL)); Flags.setOrigAlign(OriginalAlignment); if (Args[i].Ty->isPointerTy()) { Flags.setPointer(); Flags.setPointerAddrSpace( cast(Args[i].Ty)->getAddressSpace()); } if (Args[i].IsZExt) Flags.setZExt(); if (Args[i].IsSExt) Flags.setSExt(); if (Args[i].IsInReg) { // If we are using vectorcall calling convention, a structure that is // passed InReg - is surely an HVA if (CLI.CallConv == CallingConv::X86_VectorCall && isa(FinalType)) { // The first value of a structure is marked if (0 == Value) Flags.setHvaStart(); Flags.setHva(); } // Set InReg Flag Flags.setInReg(); } if (Args[i].IsSRet) Flags.setSRet(); if (Args[i].IsSwiftSelf) Flags.setSwiftSelf(); if (Args[i].IsSwiftAsync) Flags.setSwiftAsync(); if (Args[i].IsSwiftError) Flags.setSwiftError(); if (Args[i].IsCFGuardTarget) Flags.setCFGuardTarget(); if (Args[i].IsByVal) Flags.setByVal(); if (Args[i].IsByRef) Flags.setByRef(); if (Args[i].IsPreallocated) { Flags.setPreallocated(); // Set the byval flag for CCAssignFn callbacks that don't know about // preallocated. This way we can know how many bytes we should've // allocated and how many bytes a callee cleanup function will pop. If // we port preallocated to more targets, we'll have to add custom // preallocated handling in the various CC lowering callbacks. Flags.setByVal(); } if (Args[i].IsInAlloca) { Flags.setInAlloca(); // Set the byval flag for CCAssignFn callbacks that don't know about // inalloca. This way we can know how many bytes we should've allocated // and how many bytes a callee cleanup function will pop. If we port // inalloca to more targets, we'll have to add custom inalloca handling // in the various CC lowering callbacks. Flags.setByVal(); } Align MemAlign; if (Args[i].IsByVal || Args[i].IsInAlloca || Args[i].IsPreallocated) { unsigned FrameSize = DL.getTypeAllocSize(Args[i].IndirectType); Flags.setByValSize(FrameSize); // info is not there but there are cases it cannot get right. if (auto MA = Args[i].Alignment) MemAlign = *MA; else MemAlign = Align(getByValTypeAlignment(Args[i].IndirectType, DL)); } else if (auto MA = Args[i].Alignment) { MemAlign = *MA; } else { MemAlign = OriginalAlignment; } Flags.setMemAlign(MemAlign); if (Args[i].IsNest) Flags.setNest(); if (NeedsRegBlock) Flags.setInConsecutiveRegs(); MVT PartVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), CLI.CallConv, VT); unsigned NumParts = getNumRegistersForCallingConv(CLI.RetTy->getContext(), CLI.CallConv, VT); SmallVector Parts(NumParts); ISD::NodeType ExtendKind = ISD::ANY_EXTEND; if (Args[i].IsSExt) ExtendKind = ISD::SIGN_EXTEND; else if (Args[i].IsZExt) ExtendKind = ISD::ZERO_EXTEND; // Conservatively only handle 'returned' on non-vectors that can be lowered, // for now. if (Args[i].IsReturned && !Op.getValueType().isVector() && CanLowerReturn) { assert((CLI.RetTy == Args[i].Ty || (CLI.RetTy->isPointerTy() && Args[i].Ty->isPointerTy() && CLI.RetTy->getPointerAddressSpace() == Args[i].Ty->getPointerAddressSpace())) && RetTys.size() == NumValues && "unexpected use of 'returned'"); // Before passing 'returned' to the target lowering code, ensure that // either the register MVT and the actual EVT are the same size or that // the return value and argument are extended in the same way; in these // cases it's safe to pass the argument register value unchanged as the // return register value (although it's at the target's option whether // to do so) // TODO: allow code generation to take advantage of partially preserved // registers rather than clobbering the entire register when the // parameter extension method is not compatible with the return // extension method if ((NumParts * PartVT.getSizeInBits() == VT.getSizeInBits()) || (ExtendKind != ISD::ANY_EXTEND && CLI.RetSExt == Args[i].IsSExt && CLI.RetZExt == Args[i].IsZExt)) Flags.setReturned(); } getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, CLI.CB, CLI.CallConv, ExtendKind); for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 // For scalable vectors the scalable part is currently handled // by individual targets, so we just use the known minimum size here. ISD::OutputArg MyFlags( Flags, Parts[j].getValueType().getSimpleVT(), VT, i < CLI.NumFixedArgs, i, j * Parts[j].getValueType().getStoreSize().getKnownMinValue()); if (NumParts > 1 && j == 0) MyFlags.Flags.setSplit(); else if (j != 0) { MyFlags.Flags.setOrigAlign(Align(1)); if (j == NumParts - 1) MyFlags.Flags.setSplitEnd(); } CLI.Outs.push_back(MyFlags); CLI.OutVals.push_back(Parts[j]); } if (NeedsRegBlock && Value == NumValues - 1) CLI.Outs[CLI.Outs.size() - 1].Flags.setInConsecutiveRegsLast(); } } SmallVector InVals; CLI.Chain = LowerCall(CLI, InVals); // Update CLI.InVals to use outside of this function. CLI.InVals = InVals; // Verify that the target's LowerCall behaved as expected. assert(CLI.Chain.getNode() && CLI.Chain.getValueType() == MVT::Other && "LowerCall didn't return a valid chain!"); assert((!CLI.IsTailCall || InVals.empty()) && "LowerCall emitted a return value for a tail call!"); assert((CLI.IsTailCall || InVals.size() == CLI.Ins.size()) && "LowerCall didn't emit the correct number of values!"); // For a tail call, the return value is merely live-out and there aren't // any nodes in the DAG representing it. Return a special value to // indicate that a tail call has been emitted and no more Instructions // should be processed in the current block. if (CLI.IsTailCall) { CLI.DAG.setRoot(CLI.Chain); return std::make_pair(SDValue(), SDValue()); } #ifndef NDEBUG for (unsigned i = 0, e = CLI.Ins.size(); i != e; ++i) { assert(InVals[i].getNode() && "LowerCall emitted a null value!"); assert(EVT(CLI.Ins[i].VT) == InVals[i].getValueType() && "LowerCall emitted a value with the wrong type!"); } #endif SmallVector ReturnValues; if (!CanLowerReturn) { // The instruction result is the result of loading from the // hidden sret parameter. SmallVector PVTs; Type *PtrRetTy = PointerType::get(OrigRetTy->getContext(), DL.getAllocaAddrSpace()); ComputeValueVTs(*this, DL, PtrRetTy, PVTs); assert(PVTs.size() == 1 && "Pointers should fit in one register"); EVT PtrVT = PVTs[0]; unsigned NumValues = RetTys.size(); ReturnValues.resize(NumValues); SmallVector Chains(NumValues); // An aggregate return value cannot wrap around the address space, so // offsets to its parts don't wrap either. SDNodeFlags Flags; Flags.setNoUnsignedWrap(true); MachineFunction &MF = CLI.DAG.getMachineFunction(); Align HiddenSRetAlign = MF.getFrameInfo().getObjectAlign(DemoteStackIdx); for (unsigned i = 0; i < NumValues; ++i) { SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot, CLI.DAG.getConstant(Offsets[i], CLI.DL, PtrVT), Flags); SDValue L = CLI.DAG.getLoad( RetTys[i], CLI.DL, CLI.Chain, Add, MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(), DemoteStackIdx, Offsets[i]), HiddenSRetAlign); ReturnValues[i] = L; Chains[i] = L.getValue(1); } CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor, CLI.DL, MVT::Other, Chains); } else { // Collect the legal value parts into potentially illegal values // that correspond to the original function's return values. std::optional AssertOp; if (CLI.RetSExt) AssertOp = ISD::AssertSext; else if (CLI.RetZExt) AssertOp = ISD::AssertZext; unsigned CurReg = 0; for (EVT VT : RetTys) { MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), CLI.CallConv, VT); unsigned NumRegs = getNumRegistersForCallingConv(CLI.RetTy->getContext(), CLI.CallConv, VT); ReturnValues.push_back(getCopyFromParts( CLI.DAG, CLI.DL, &InVals[CurReg], NumRegs, RegisterVT, VT, nullptr, CLI.Chain, CLI.CallConv, AssertOp)); CurReg += NumRegs; } // For a function returning void, there is no return value. We can't create // such a node, so we just return a null return value in that case. In // that case, nothing will actually look at the value. if (ReturnValues.empty()) return std::make_pair(SDValue(), CLI.Chain); } SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL, CLI.DAG.getVTList(RetTys), ReturnValues); return std::make_pair(Res, CLI.Chain); } /// Places new result values for the node in Results (their number /// and types must exactly match those of the original return values of /// the node), or leaves Results empty, which indicates that the node is not /// to be custom lowered after all. void TargetLowering::LowerOperationWrapper(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { SDValue Res = LowerOperation(SDValue(N, 0), DAG); if (!Res.getNode()) return; // If the original node has one result, take the return value from // LowerOperation as is. It might not be result number 0. if (N->getNumValues() == 1) { Results.push_back(Res); return; } // If the original node has multiple results, then the return node should // have the same number of results. assert((N->getNumValues() == Res->getNumValues()) && "Lowering returned the wrong number of results!"); // Places new result values base on N result number. for (unsigned I = 0, E = N->getNumValues(); I != E; ++I) Results.push_back(Res.getValue(I)); } SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { llvm_unreachable("LowerOperation not implemented for this target!"); } void SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg, ISD::NodeType ExtendType) { SDValue Op = getNonRegisterValue(V); assert((Op.getOpcode() != ISD::CopyFromReg || cast(Op.getOperand(1))->getReg() != Reg) && "Copy from a reg to the same reg!"); assert(!Register::isPhysicalRegister(Reg) && "Is a physreg"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // If this is an InlineAsm we have to match the registers required, not the // notional registers required by the type. RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, V->getType(), std::nullopt); // This is not an ABI copy. SDValue Chain = DAG.getEntryNode(); if (ExtendType == ISD::ANY_EXTEND) { auto PreferredExtendIt = FuncInfo.PreferredExtendType.find(V); if (PreferredExtendIt != FuncInfo.PreferredExtendType.end()) ExtendType = PreferredExtendIt->second; } RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V, ExtendType); PendingExports.push_back(Chain); } #include "llvm/CodeGen/SelectionDAGISel.h" /// isOnlyUsedInEntryBlock - If the specified argument is only used in the /// entry block, return true. This includes arguments used by switches, since /// the switch may expand into multiple basic blocks. static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { // With FastISel active, we may be splitting blocks, so force creation // of virtual registers for all non-dead arguments. if (FastISel) return A->use_empty(); const BasicBlock &Entry = A->getParent()->front(); for (const User *U : A->users()) if (cast(U)->getParent() != &Entry || isa(U)) return false; // Use not in entry block. return true; } using ArgCopyElisionMapTy = DenseMap>; /// Scan the entry block of the function in FuncInfo for arguments that look /// like copies into a local alloca. Record any copied arguments in /// ArgCopyElisionCandidates. static void findArgumentCopyElisionCandidates(const DataLayout &DL, FunctionLoweringInfo *FuncInfo, ArgCopyElisionMapTy &ArgCopyElisionCandidates) { // Record the state of every static alloca used in the entry block. Argument // allocas are all used in the entry block, so we need approximately as many // entries as we have arguments. enum StaticAllocaInfo { Unknown, Clobbered, Elidable }; SmallDenseMap StaticAllocas; unsigned NumArgs = FuncInfo->Fn->arg_size(); StaticAllocas.reserve(NumArgs * 2); auto GetInfoIfStaticAlloca = [&](const Value *V) -> StaticAllocaInfo * { if (!V) return nullptr; V = V->stripPointerCasts(); const auto *AI = dyn_cast(V); if (!AI || !AI->isStaticAlloca() || !FuncInfo->StaticAllocaMap.count(AI)) return nullptr; auto Iter = StaticAllocas.insert({AI, Unknown}); return &Iter.first->second; }; // Look for stores of arguments to static allocas. Look through bitcasts and // GEPs to handle type coercions, as long as the alloca is fully initialized // by the store. Any non-store use of an alloca escapes it and any subsequent // unanalyzed store might write it. // FIXME: Handle structs initialized with multiple stores. for (const Instruction &I : FuncInfo->Fn->getEntryBlock()) { // Look for stores, and handle non-store uses conservatively. const auto *SI = dyn_cast(&I); if (!SI) { // We will look through cast uses, so ignore them completely. if (I.isCast()) continue; // Ignore debug info and pseudo op intrinsics, they don't escape or store // to allocas. if (I.isDebugOrPseudoInst()) continue; // This is an unknown instruction. Assume it escapes or writes to all // static alloca operands. for (const Use &U : I.operands()) { if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(U)) *Info = StaticAllocaInfo::Clobbered; } continue; } // If the stored value is a static alloca, mark it as escaped. if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(SI->getValueOperand())) *Info = StaticAllocaInfo::Clobbered; // Check if the destination is a static alloca. const Value *Dst = SI->getPointerOperand()->stripPointerCasts(); StaticAllocaInfo *Info = GetInfoIfStaticAlloca(Dst); if (!Info) continue; const AllocaInst *AI = cast(Dst); // Skip allocas that have been initialized or clobbered. if (*Info != StaticAllocaInfo::Unknown) continue; // Check if the stored value is an argument, and that this store fully // initializes the alloca. // If the argument type has padding bits we can't directly forward a pointer // as the upper bits may contain garbage. // Don't elide copies from the same argument twice. const Value *Val = SI->getValueOperand()->stripPointerCasts(); const auto *Arg = dyn_cast(Val); if (!Arg || Arg->hasPassPointeeByValueCopyAttr() || Arg->getType()->isEmptyTy() || DL.getTypeStoreSize(Arg->getType()) != DL.getTypeAllocSize(AI->getAllocatedType()) || !DL.typeSizeEqualsStoreSize(Arg->getType()) || ArgCopyElisionCandidates.count(Arg)) { *Info = StaticAllocaInfo::Clobbered; continue; } LLVM_DEBUG(dbgs() << "Found argument copy elision candidate: " << *AI << '\n'); // Mark this alloca and store for argument copy elision. *Info = StaticAllocaInfo::Elidable; ArgCopyElisionCandidates.insert({Arg, {AI, SI}}); // Stop scanning if we've seen all arguments. This will happen early in -O0 // builds, which is useful, because -O0 builds have large entry blocks and // many allocas. if (ArgCopyElisionCandidates.size() == NumArgs) break; } } /// Try to elide argument copies from memory into a local alloca. Succeeds if /// ArgVal is a load from a suitable fixed stack object. static void tryToElideArgumentCopy( FunctionLoweringInfo &FuncInfo, SmallVectorImpl &Chains, DenseMap &ArgCopyElisionFrameIndexMap, SmallPtrSetImpl &ElidedArgCopyInstrs, ArgCopyElisionMapTy &ArgCopyElisionCandidates, const Argument &Arg, ArrayRef ArgVals, bool &ArgHasUses) { // Check if this is a load from a fixed stack object. auto *LNode = dyn_cast(ArgVals[0]); if (!LNode) return; auto *FINode = dyn_cast(LNode->getBasePtr().getNode()); if (!FINode) return; // Check that the fixed stack object is the right size and alignment. // Look at the alignment that the user wrote on the alloca instead of looking // at the stack object. auto ArgCopyIter = ArgCopyElisionCandidates.find(&Arg); assert(ArgCopyIter != ArgCopyElisionCandidates.end()); const AllocaInst *AI = ArgCopyIter->second.first; int FixedIndex = FINode->getIndex(); int &AllocaIndex = FuncInfo.StaticAllocaMap[AI]; int OldIndex = AllocaIndex; MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); if (MFI.getObjectSize(FixedIndex) != MFI.getObjectSize(OldIndex)) { LLVM_DEBUG( dbgs() << " argument copy elision failed due to bad fixed stack " "object size\n"); return; } Align RequiredAlignment = AI->getAlign(); if (MFI.getObjectAlign(FixedIndex) < RequiredAlignment) { LLVM_DEBUG(dbgs() << " argument copy elision failed: alignment of alloca " "greater than stack argument alignment (" << DebugStr(RequiredAlignment) << " vs " << DebugStr(MFI.getObjectAlign(FixedIndex)) << ")\n"); return; } // Perform the elision. Delete the old stack object and replace its only use // in the variable info map. Mark the stack object as mutable and aliased. LLVM_DEBUG({ dbgs() << "Eliding argument copy from " << Arg << " to " << *AI << '\n' << " Replacing frame index " << OldIndex << " with " << FixedIndex << '\n'; }); MFI.RemoveStackObject(OldIndex); MFI.setIsImmutableObjectIndex(FixedIndex, false); MFI.setIsAliasedObjectIndex(FixedIndex, true); AllocaIndex = FixedIndex; ArgCopyElisionFrameIndexMap.insert({OldIndex, FixedIndex}); for (SDValue ArgVal : ArgVals) Chains.push_back(ArgVal.getValue(1)); // Avoid emitting code for the store implementing the copy. const StoreInst *SI = ArgCopyIter->second.second; ElidedArgCopyInstrs.insert(SI); // Check for uses of the argument again so that we can avoid exporting ArgVal // if it is't used by anything other than the store. for (const Value *U : Arg.users()) { if (U != SI) { ArgHasUses = true; break; } } } void SelectionDAGISel::LowerArguments(const Function &F) { SelectionDAG &DAG = SDB->DAG; SDLoc dl = SDB->getCurSDLoc(); const DataLayout &DL = DAG.getDataLayout(); SmallVector Ins; // In Naked functions we aren't going to save any registers. if (F.hasFnAttribute(Attribute::Naked)) return; if (!FuncInfo->CanLowerReturn) { // Put in an sret pointer parameter before all the other parameters. SmallVector ValueVTs; ComputeValueVTs(*TLI, DAG.getDataLayout(), PointerType::get(F.getContext(), DAG.getDataLayout().getAllocaAddrSpace()), ValueVTs); // NOTE: Assuming that a pointer will never break down to more than one VT // or one register. ISD::ArgFlagsTy Flags; Flags.setSRet(); MVT RegisterVT = TLI->getRegisterType(*DAG.getContext(), ValueVTs[0]); ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true, ISD::InputArg::NoArgIndex, 0); Ins.push_back(RetArg); } // Look for stores of arguments to static allocas. Mark such arguments with a // flag to ask the target to give us the memory location of that argument if // available. ArgCopyElisionMapTy ArgCopyElisionCandidates; findArgumentCopyElisionCandidates(DL, FuncInfo.get(), ArgCopyElisionCandidates); // Set up the incoming argument description vector. for (const Argument &Arg : F.args()) { unsigned ArgNo = Arg.getArgNo(); SmallVector ValueVTs; ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs); bool isArgValueUsed = !Arg.use_empty(); unsigned PartBase = 0; Type *FinalType = Arg.getType(); if (Arg.hasAttribute(Attribute::ByVal)) FinalType = Arg.getParamByValType(); bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters( FinalType, F.getCallingConv(), F.isVarArg(), DL); for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; ++Value) { EVT VT = ValueVTs[Value]; Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); ISD::ArgFlagsTy Flags; if (Arg.getType()->isPointerTy()) { Flags.setPointer(); Flags.setPointerAddrSpace( cast(Arg.getType())->getAddressSpace()); } if (Arg.hasAttribute(Attribute::ZExt)) Flags.setZExt(); if (Arg.hasAttribute(Attribute::SExt)) Flags.setSExt(); if (Arg.hasAttribute(Attribute::InReg)) { // If we are using vectorcall calling convention, a structure that is // passed InReg - is surely an HVA if (F.getCallingConv() == CallingConv::X86_VectorCall && isa(Arg.getType())) { // The first value of a structure is marked if (0 == Value) Flags.setHvaStart(); Flags.setHva(); } // Set InReg Flag Flags.setInReg(); } if (Arg.hasAttribute(Attribute::StructRet)) Flags.setSRet(); if (Arg.hasAttribute(Attribute::SwiftSelf)) Flags.setSwiftSelf(); if (Arg.hasAttribute(Attribute::SwiftAsync)) Flags.setSwiftAsync(); if (Arg.hasAttribute(Attribute::SwiftError)) Flags.setSwiftError(); if (Arg.hasAttribute(Attribute::ByVal)) Flags.setByVal(); if (Arg.hasAttribute(Attribute::ByRef)) Flags.setByRef(); if (Arg.hasAttribute(Attribute::InAlloca)) { Flags.setInAlloca(); // Set the byval flag for CCAssignFn callbacks that don't know about // inalloca. This way we can know how many bytes we should've allocated // and how many bytes a callee cleanup function will pop. If we port // inalloca to more targets, we'll have to add custom inalloca handling // in the various CC lowering callbacks. Flags.setByVal(); } if (Arg.hasAttribute(Attribute::Preallocated)) { Flags.setPreallocated(); // Set the byval flag for CCAssignFn callbacks that don't know about // preallocated. This way we can know how many bytes we should've // allocated and how many bytes a callee cleanup function will pop. If // we port preallocated to more targets, we'll have to add custom // preallocated handling in the various CC lowering callbacks. Flags.setByVal(); } // Certain targets (such as MIPS), may have a different ABI alignment // for a type depending on the context. Give the target a chance to // specify the alignment it wants. const Align OriginalAlignment( TLI->getABIAlignmentForCallingConv(ArgTy, DL)); Flags.setOrigAlign(OriginalAlignment); Align MemAlign; Type *ArgMemTy = nullptr; if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated() || Flags.isByRef()) { if (!ArgMemTy) ArgMemTy = Arg.getPointeeInMemoryValueType(); uint64_t MemSize = DL.getTypeAllocSize(ArgMemTy); // For in-memory arguments, size and alignment should be passed from FE. // BE will guess if this info is not there but there are cases it cannot // get right. if (auto ParamAlign = Arg.getParamStackAlign()) MemAlign = *ParamAlign; else if ((ParamAlign = Arg.getParamAlign())) MemAlign = *ParamAlign; else MemAlign = Align(TLI->getByValTypeAlignment(ArgMemTy, DL)); if (Flags.isByRef()) Flags.setByRefSize(MemSize); else Flags.setByValSize(MemSize); } else if (auto ParamAlign = Arg.getParamStackAlign()) { MemAlign = *ParamAlign; } else { MemAlign = OriginalAlignment; } Flags.setMemAlign(MemAlign); if (Arg.hasAttribute(Attribute::Nest)) Flags.setNest(); if (NeedsRegBlock) Flags.setInConsecutiveRegs(); if (ArgCopyElisionCandidates.count(&Arg)) Flags.setCopyElisionCandidate(); if (Arg.hasAttribute(Attribute::Returned)) Flags.setReturned(); MVT RegisterVT = TLI->getRegisterTypeForCallingConv( *CurDAG->getContext(), F.getCallingConv(), VT); unsigned NumRegs = TLI->getNumRegistersForCallingConv( *CurDAG->getContext(), F.getCallingConv(), VT); for (unsigned i = 0; i != NumRegs; ++i) { // For scalable vectors, use the minimum size; individual targets // are responsible for handling scalable vector arguments and // return values. ISD::InputArg MyFlags( Flags, RegisterVT, VT, isArgValueUsed, ArgNo, PartBase + i * RegisterVT.getStoreSize().getKnownMinValue()); if (NumRegs > 1 && i == 0) MyFlags.Flags.setSplit(); // if it isn't first piece, alignment must be 1 else if (i > 0) { MyFlags.Flags.setOrigAlign(Align(1)); if (i == NumRegs - 1) MyFlags.Flags.setSplitEnd(); } Ins.push_back(MyFlags); } if (NeedsRegBlock && Value == NumValues - 1) Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast(); PartBase += VT.getStoreSize().getKnownMinValue(); } } // Call the target to set up the argument values. SmallVector InVals; SDValue NewRoot = TLI->LowerFormalArguments( DAG.getRoot(), F.getCallingConv(), F.isVarArg(), Ins, dl, DAG, InVals); // Verify that the target's LowerFormalArguments behaved as expected. assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other && "LowerFormalArguments didn't return a valid chain!"); assert(InVals.size() == Ins.size() && "LowerFormalArguments didn't emit the correct number of values!"); LLVM_DEBUG({ for (unsigned i = 0, e = Ins.size(); i != e; ++i) { assert(InVals[i].getNode() && "LowerFormalArguments emitted a null value!"); assert(EVT(Ins[i].VT) == InVals[i].getValueType() && "LowerFormalArguments emitted a value with the wrong type!"); } }); // Update the DAG with the new chain value resulting from argument lowering. DAG.setRoot(NewRoot); // Set up the argument values. unsigned i = 0; if (!FuncInfo->CanLowerReturn) { // Create a virtual register for the sret pointer, and put in a copy // from the sret argument into it. SmallVector ValueVTs; ComputeValueVTs(*TLI, DAG.getDataLayout(), PointerType::get(F.getContext(), DAG.getDataLayout().getAllocaAddrSpace()), ValueVTs); MVT VT = ValueVTs[0].getSimpleVT(); MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT); std::optional AssertOp; SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, VT, nullptr, NewRoot, F.getCallingConv(), AssertOp); MachineFunction& MF = SDB->DAG.getMachineFunction(); MachineRegisterInfo& RegInfo = MF.getRegInfo(); Register SRetReg = RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT)); FuncInfo->DemoteRegister = SRetReg; NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(), SRetReg, ArgValue); DAG.setRoot(NewRoot); // i indexes lowered arguments. Bump it past the hidden sret argument. ++i; } SmallVector Chains; DenseMap ArgCopyElisionFrameIndexMap; for (const Argument &Arg : F.args()) { SmallVector ArgValues; SmallVector ValueVTs; ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) continue; bool ArgHasUses = !Arg.use_empty(); // Elide the copying store if the target loaded this argument from a // suitable fixed stack object. if (Ins[i].Flags.isCopyElisionCandidate()) { unsigned NumParts = 0; for (EVT VT : ValueVTs) NumParts += TLI->getNumRegistersForCallingConv(*CurDAG->getContext(), F.getCallingConv(), VT); tryToElideArgumentCopy(*FuncInfo, Chains, ArgCopyElisionFrameIndexMap, ElidedArgCopyInstrs, ArgCopyElisionCandidates, Arg, ArrayRef(&InVals[i], NumParts), ArgHasUses); } // If this argument is unused then remember its value. It is used to generate // debugging information. bool isSwiftErrorArg = TLI->supportSwiftError() && Arg.hasAttribute(Attribute::SwiftError); if (!ArgHasUses && !isSwiftErrorArg) { SDB->setUnusedArgValue(&Arg, InVals[i]); // Also remember any frame index for use in FastISel. if (FrameIndexSDNode *FI = dyn_cast(InVals[i].getNode())) FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex()); } for (unsigned Val = 0; Val != NumValues; ++Val) { EVT VT = ValueVTs[Val]; MVT PartVT = TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(), F.getCallingConv(), VT); unsigned NumParts = TLI->getNumRegistersForCallingConv( *CurDAG->getContext(), F.getCallingConv(), VT); // Even an apparent 'unused' swifterror argument needs to be returned. So // we do generate a copy for it that can be used on return from the // function. if (ArgHasUses || isSwiftErrorArg) { std::optional AssertOp; if (Arg.hasAttribute(Attribute::SExt)) AssertOp = ISD::AssertSext; else if (Arg.hasAttribute(Attribute::ZExt)) AssertOp = ISD::AssertZext; ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts, PartVT, VT, nullptr, NewRoot, F.getCallingConv(), AssertOp)); } i += NumParts; } // We don't need to do anything else for unused arguments. if (ArgValues.empty()) continue; // Note down frame index. if (FrameIndexSDNode *FI = dyn_cast(ArgValues[0].getNode())) FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex()); SDValue Res = DAG.getMergeValues(ArrayRef(ArgValues.data(), NumValues), SDB->getCurSDLoc()); SDB->setValue(&Arg, Res); if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) { // We want to associate the argument with the frame index, among // involved operands, that correspond to the lowest address. The // getCopyFromParts function, called earlier, is swapping the order of // the operands to BUILD_PAIR depending on endianness. The result of // that swapping is that the least significant bits of the argument will // be in the first operand of the BUILD_PAIR node, and the most // significant bits will be in the second operand. unsigned LowAddressOp = DAG.getDataLayout().isBigEndian() ? 1 : 0; if (LoadSDNode *LNode = dyn_cast(Res.getOperand(LowAddressOp).getNode())) if (FrameIndexSDNode *FI = dyn_cast(LNode->getBasePtr().getNode())) FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex()); } // Analyses past this point are naive and don't expect an assertion. if (Res.getOpcode() == ISD::AssertZext) Res = Res.getOperand(0); // Update the SwiftErrorVRegDefMap. if (Res.getOpcode() == ISD::CopyFromReg && isSwiftErrorArg) { unsigned Reg = cast(Res.getOperand(1))->getReg(); if (Register::isVirtualRegister(Reg)) SwiftError->setCurrentVReg(FuncInfo->MBB, SwiftError->getFunctionArg(), Reg); } // If this argument is live outside of the entry block, insert a copy from // wherever we got it to the vreg that other BB's will reference it as. if (Res.getOpcode() == ISD::CopyFromReg) { // If we can, though, try to skip creating an unnecessary vreg. // FIXME: This isn't very clean... it would be nice to make this more // general. unsigned Reg = cast(Res.getOperand(1))->getReg(); if (Register::isVirtualRegister(Reg)) { FuncInfo->ValueMap[&Arg] = Reg; continue; } } if (!isOnlyUsedInEntryBlock(&Arg, TM.Options.EnableFastISel)) { FuncInfo->InitializeRegForValue(&Arg); SDB->CopyToExportRegsIfNeeded(&Arg); } } if (!Chains.empty()) { Chains.push_back(NewRoot); NewRoot = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); } DAG.setRoot(NewRoot); assert(i == InVals.size() && "Argument register count mismatch!"); // If any argument copy elisions occurred and we have debug info, update the // stale frame indices used in the dbg.declare variable info table. if (!ArgCopyElisionFrameIndexMap.empty()) { for (MachineFunction::VariableDbgInfo &VI : MF->getInStackSlotVariableDbgInfo()) { auto I = ArgCopyElisionFrameIndexMap.find(VI.getStackSlot()); if (I != ArgCopyElisionFrameIndexMap.end()) VI.updateStackSlot(I->second); } } // Finally, if the target has anything special to do, allow it to do so. emitFunctionEntryCode(); } /// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to /// ensure constants are generated when needed. Remember the virtual registers /// that need to be added to the Machine PHI nodes as input. We cannot just /// directly add them, because expansion might result in multiple MBB's for one /// BB. As such, the start of the BB might correspond to a different MBB than /// the end. void SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallPtrSet SuccsHandled; // Check PHI nodes in successors that expect a value to be available from this // block. for (const BasicBlock *SuccBB : successors(LLVMBB->getTerminator())) { if (!isa(SuccBB->begin())) continue; MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB]; // If this terminator has multiple identical successors (common for // switches), only handle each succ once. if (!SuccsHandled.insert(SuccMBB).second) continue; MachineBasicBlock::iterator MBBI = SuccMBB->begin(); // At this point we know that there is a 1-1 correspondence between LLVM PHI // nodes and Machine PHI nodes, but the incoming operands have not been // emitted yet. for (const PHINode &PN : SuccBB->phis()) { // Ignore dead phi's. if (PN.use_empty()) continue; // Skip empty types if (PN.getType()->isEmptyTy()) continue; unsigned Reg; const Value *PHIOp = PN.getIncomingValueForBlock(LLVMBB); if (const auto *C = dyn_cast(PHIOp)) { unsigned &RegOut = ConstantsOut[C]; if (RegOut == 0) { RegOut = FuncInfo.CreateRegs(C); // We need to zero/sign extend ConstantInt phi operands to match // assumptions in FunctionLoweringInfo::ComputePHILiveOutRegInfo. ISD::NodeType ExtendType = ISD::ANY_EXTEND; if (auto *CI = dyn_cast(C)) ExtendType = TLI.signExtendConstant(CI) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; CopyValueToVirtualRegister(C, RegOut, ExtendType); } Reg = RegOut; } else { DenseMap::iterator I = FuncInfo.ValueMap.find(PHIOp); if (I != FuncInfo.ValueMap.end()) Reg = I->second; else { assert(isa(PHIOp) && FuncInfo.StaticAllocaMap.count(cast(PHIOp)) && "Didn't codegen value into a register!??"); Reg = FuncInfo.CreateRegs(PHIOp); CopyValueToVirtualRegister(PHIOp, Reg); } } // Remember that this register needs to added to the machine PHI node as // the input for this MBB. SmallVector ValueVTs; ComputeValueVTs(TLI, DAG.getDataLayout(), PN.getType(), ValueVTs); for (EVT VT : ValueVTs) { const unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT); for (unsigned i = 0; i != NumRegisters; ++i) FuncInfo.PHINodesToUpdate.push_back( std::make_pair(&*MBBI++, Reg + i)); Reg += NumRegisters; } } } ConstantsOut.clear(); } MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) { MachineFunction::iterator I(MBB); if (++I == FuncInfo.MF->end()) return nullptr; return &*I; } /// During lowering new call nodes can be created (such as memset, etc.). /// Those will become new roots of the current DAG, but complications arise /// when they are tail calls. In such cases, the call lowering will update /// the root, but the builder still needs to know that a tail call has been /// lowered in order to avoid generating an additional return. void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) { // If the node is null, we do have a tail call. if (MaybeTC.getNode() != nullptr) DAG.setRoot(MaybeTC); else HasTailCall = true; } void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, MachineBasicBlock *SwitchMBB, MachineBasicBlock *DefaultMBB) { MachineFunction *CurMF = FuncInfo.MF; MachineBasicBlock *NextMBB = nullptr; MachineFunction::iterator BBI(W.MBB); if (++BBI != FuncInfo.MF->end()) NextMBB = &*BBI; unsigned Size = W.LastCluster - W.FirstCluster + 1; BranchProbabilityInfo *BPI = FuncInfo.BPI; if (Size == 2 && W.MBB == SwitchMBB) { // If any two of the cases has the same destination, and if one value // is the same as the other, but has one bit unset that the other has set, // use bit manipulation to do two compares at once. For example: // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)" // TODO: This could be extended to merge any 2 cases in switches with 3 // cases. // TODO: Handle cases where W.CaseBB != SwitchBB. CaseCluster &Small = *W.FirstCluster; CaseCluster &Big = *W.LastCluster; if (Small.Low == Small.High && Big.Low == Big.High && Small.MBB == Big.MBB) { const APInt &SmallValue = Small.Low->getValue(); const APInt &BigValue = Big.Low->getValue(); // Check that there is only one bit different. APInt CommonBit = BigValue ^ SmallValue; if (CommonBit.isPowerOf2()) { SDValue CondLHS = getValue(Cond); EVT VT = CondLHS.getValueType(); SDLoc DL = getCurSDLoc(); SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS, DAG.getConstant(CommonBit, DL, VT)); SDValue Cond = DAG.getSetCC( DL, MVT::i1, Or, DAG.getConstant(BigValue | SmallValue, DL, VT), ISD::SETEQ); // Update successor info. // Both Small and Big will jump to Small.BB, so we sum up the // probabilities. addSuccessorWithProb(SwitchMBB, Small.MBB, Small.Prob + Big.Prob); if (BPI) addSuccessorWithProb( SwitchMBB, DefaultMBB, // The default destination is the first successor in IR. BPI->getEdgeProbability(SwitchMBB->getBasicBlock(), (unsigned)0)); else addSuccessorWithProb(SwitchMBB, DefaultMBB); // Insert the true branch. SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other, getControlRoot(), Cond, DAG.getBasicBlock(Small.MBB)); // Insert the false branch. BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond, DAG.getBasicBlock(DefaultMBB)); DAG.setRoot(BrCond); return; } } } if (TM.getOptLevel() != CodeGenOptLevel::None) { // Here, we order cases by probability so the most likely case will be // checked first. However, two clusters can have the same probability in // which case their relative ordering is non-deterministic. So we use Low // as a tie-breaker as clusters are guaranteed to never overlap. llvm::sort(W.FirstCluster, W.LastCluster + 1, [](const CaseCluster &a, const CaseCluster &b) { return a.Prob != b.Prob ? a.Prob > b.Prob : a.Low->getValue().slt(b.Low->getValue()); }); // Rearrange the case blocks so that the last one falls through if possible // without changing the order of probabilities. for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster; ) { --I; if (I->Prob > W.LastCluster->Prob) break; if (I->Kind == CC_Range && I->MBB == NextMBB) { std::swap(*I, *W.LastCluster); break; } } } // Compute total probability. BranchProbability DefaultProb = W.DefaultProb; BranchProbability UnhandledProbs = DefaultProb; for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I) UnhandledProbs += I->Prob; MachineBasicBlock *CurMBB = W.MBB; for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) { bool FallthroughUnreachable = false; MachineBasicBlock *Fallthrough; if (I == W.LastCluster) { // For the last cluster, fall through to the default destination. Fallthrough = DefaultMBB; FallthroughUnreachable = isa( DefaultMBB->getBasicBlock()->getFirstNonPHIOrDbg()); } else { Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock()); CurMF->insert(BBI, Fallthrough); // Put Cond in a virtual register to make it available from the new blocks. ExportFromCurrentBlock(Cond); } UnhandledProbs -= I->Prob; switch (I->Kind) { case CC_JumpTable: { // FIXME: Optimize away range check based on pivot comparisons. JumpTableHeader *JTH = &SL->JTCases[I->JTCasesIndex].first; SwitchCG::JumpTable *JT = &SL->JTCases[I->JTCasesIndex].second; // The jump block hasn't been inserted yet; insert it here. MachineBasicBlock *JumpMBB = JT->MBB; CurMF->insert(BBI, JumpMBB); auto JumpProb = I->Prob; auto FallthroughProb = UnhandledProbs; // If the default statement is a target of the jump table, we evenly // distribute the default probability to successors of CurMBB. Also // update the probability on the edge from JumpMBB to Fallthrough. for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(), SE = JumpMBB->succ_end(); SI != SE; ++SI) { if (*SI == DefaultMBB) { JumpProb += DefaultProb / 2; FallthroughProb -= DefaultProb / 2; JumpMBB->setSuccProbability(SI, DefaultProb / 2); JumpMBB->normalizeSuccProbs(); break; } } // If the default clause is unreachable, propagate that knowledge into // JTH->FallthroughUnreachable which will use it to suppress the range // check. // // However, don't do this if we're doing branch target enforcement, // because a table branch _without_ a range check can be a tempting JOP // gadget - out-of-bounds inputs that are impossible in correct // execution become possible again if an attacker can influence the // control flow. So if an attacker doesn't already have a BTI bypass // available, we don't want them to be able to get one out of this // table branch. if (FallthroughUnreachable) { Function &CurFunc = CurMF->getFunction(); if (!CurFunc.hasFnAttribute("branch-target-enforcement")) JTH->FallthroughUnreachable = true; } if (!JTH->FallthroughUnreachable) addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb); addSuccessorWithProb(CurMBB, JumpMBB, JumpProb); CurMBB->normalizeSuccProbs(); // The jump table header will be inserted in our current block, do the // range check, and fall through to our fallthrough block. JTH->HeaderBB = CurMBB; JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader. // If we're in the right place, emit the jump table header right now. if (CurMBB == SwitchMBB) { visitJumpTableHeader(*JT, *JTH, SwitchMBB); JTH->Emitted = true; } break; } case CC_BitTests: { // FIXME: Optimize away range check based on pivot comparisons. BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex]; // The bit test blocks haven't been inserted yet; insert them here. for (BitTestCase &BTC : BTB->Cases) CurMF->insert(BBI, BTC.ThisBB); // Fill in fields of the BitTestBlock. BTB->Parent = CurMBB; BTB->Default = Fallthrough; BTB->DefaultProb = UnhandledProbs; // If the cases in bit test don't form a contiguous range, we evenly // distribute the probability on the edge to Fallthrough to two // successors of CurMBB. if (!BTB->ContiguousRange) { BTB->Prob += DefaultProb / 2; BTB->DefaultProb -= DefaultProb / 2; } if (FallthroughUnreachable) BTB->FallthroughUnreachable = true; // If we're in the right place, emit the bit test header right now. if (CurMBB == SwitchMBB) { visitBitTestHeader(*BTB, SwitchMBB); BTB->Emitted = true; } break; } case CC_Range: { const Value *RHS, *LHS, *MHS; ISD::CondCode CC; if (I->Low == I->High) { // Check Cond == I->Low. CC = ISD::SETEQ; LHS = Cond; RHS=I->Low; MHS = nullptr; } else { // Check I->Low <= Cond <= I->High. CC = ISD::SETLE; LHS = I->Low; MHS = Cond; RHS = I->High; } // If Fallthrough is unreachable, fold away the comparison. if (FallthroughUnreachable) CC = ISD::SETTRUE; // The false probability is the sum of all unhandled cases. CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, getCurSDLoc(), I->Prob, UnhandledProbs); if (CurMBB == SwitchMBB) visitSwitchCase(CB, SwitchMBB); else SL->SwitchCases.push_back(CB); break; } } CurMBB = Fallthrough; } } void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, const SwitchWorkListItem &W, Value *Cond, MachineBasicBlock *SwitchMBB) { assert(W.FirstCluster->Low->getValue().slt(W.LastCluster->Low->getValue()) && "Clusters not sorted?"); assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!"); auto [LastLeft, FirstRight, LeftProb, RightProb] = SL->computeSplitWorkItemInfo(W); // Use the first element on the right as pivot since we will make less-than // comparisons against it. CaseClusterIt PivotCluster = FirstRight; assert(PivotCluster > W.FirstCluster); assert(PivotCluster <= W.LastCluster); CaseClusterIt FirstLeft = W.FirstCluster; CaseClusterIt LastRight = W.LastCluster; const ConstantInt *Pivot = PivotCluster->Low; // New blocks will be inserted immediately after the current one. MachineFunction::iterator BBI(W.MBB); ++BBI; // We will branch to the LHS if Value < Pivot. If LHS is a single cluster, // we can branch to its destination directly if it's squeezed exactly in // between the known lower bound and Pivot - 1. MachineBasicBlock *LeftMBB; if (FirstLeft == LastLeft && FirstLeft->Kind == CC_Range && FirstLeft->Low == W.GE && (FirstLeft->High->getValue() + 1LL) == Pivot->getValue()) { LeftMBB = FirstLeft->MBB; } else { LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock()); FuncInfo.MF->insert(BBI, LeftMBB); WorkList.push_back( {LeftMBB, FirstLeft, LastLeft, W.GE, Pivot, W.DefaultProb / 2}); // Put Cond in a virtual register to make it available from the new blocks. ExportFromCurrentBlock(Cond); } // Similarly, we will branch to the RHS if Value >= Pivot. If RHS is a // single cluster, RHS.Low == Pivot, and we can branch to its destination // directly if RHS.High equals the current upper bound. MachineBasicBlock *RightMBB; if (FirstRight == LastRight && FirstRight->Kind == CC_Range && W.LT && (FirstRight->High->getValue() + 1ULL) == W.LT->getValue()) { RightMBB = FirstRight->MBB; } else { RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock()); FuncInfo.MF->insert(BBI, RightMBB); WorkList.push_back( {RightMBB, FirstRight, LastRight, Pivot, W.LT, W.DefaultProb / 2}); // Put Cond in a virtual register to make it available from the new blocks. ExportFromCurrentBlock(Cond); } // Create the CaseBlock record that will be used to lower the branch. CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB, getCurSDLoc(), LeftProb, RightProb); if (W.MBB == SwitchMBB) visitSwitchCase(CB, SwitchMBB); else SL->SwitchCases.push_back(CB); } // Scale CaseProb after peeling a case with the probablity of PeeledCaseProb // from the swith statement. static BranchProbability scaleCaseProbality(BranchProbability CaseProb, BranchProbability PeeledCaseProb) { if (PeeledCaseProb == BranchProbability::getOne()) return BranchProbability::getZero(); BranchProbability SwitchProb = PeeledCaseProb.getCompl(); uint32_t Numerator = CaseProb.getNumerator(); uint32_t Denominator = SwitchProb.scale(CaseProb.getDenominator()); return BranchProbability(Numerator, std::max(Numerator, Denominator)); } // Try to peel the top probability case if it exceeds the threshold. // Return current MachineBasicBlock for the switch statement if the peeling // does not occur. // If the peeling is performed, return the newly created MachineBasicBlock // for the peeled switch statement. Also update Clusters to remove the peeled // case. PeeledCaseProb is the BranchProbability for the peeled case. MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster( const SwitchInst &SI, CaseClusterVector &Clusters, BranchProbability &PeeledCaseProb) { MachineBasicBlock *SwitchMBB = FuncInfo.MBB; // Don't perform if there is only one cluster or optimizing for size. if (SwitchPeelThreshold > 100 || !FuncInfo.BPI || Clusters.size() < 2 || TM.getOptLevel() == CodeGenOptLevel::None || SwitchMBB->getParent()->getFunction().hasMinSize()) return SwitchMBB; BranchProbability TopCaseProb = BranchProbability(SwitchPeelThreshold, 100); unsigned PeeledCaseIndex = 0; bool SwitchPeeled = false; for (unsigned Index = 0; Index < Clusters.size(); ++Index) { CaseCluster &CC = Clusters[Index]; if (CC.Prob < TopCaseProb) continue; TopCaseProb = CC.Prob; PeeledCaseIndex = Index; SwitchPeeled = true; } if (!SwitchPeeled) return SwitchMBB; LLVM_DEBUG(dbgs() << "Peeled one top case in switch stmt, prob: " << TopCaseProb << "\n"); // Record the MBB for the peeled switch statement. MachineFunction::iterator BBI(SwitchMBB); ++BBI; MachineBasicBlock *PeeledSwitchMBB = FuncInfo.MF->CreateMachineBasicBlock(SwitchMBB->getBasicBlock()); FuncInfo.MF->insert(BBI, PeeledSwitchMBB); ExportFromCurrentBlock(SI.getCondition()); auto PeeledCaseIt = Clusters.begin() + PeeledCaseIndex; SwitchWorkListItem W = {SwitchMBB, PeeledCaseIt, PeeledCaseIt, nullptr, nullptr, TopCaseProb.getCompl()}; lowerWorkItem(W, SI.getCondition(), SwitchMBB, PeeledSwitchMBB); Clusters.erase(PeeledCaseIt); for (CaseCluster &CC : Clusters) { LLVM_DEBUG( dbgs() << "Scale the probablity for one cluster, before scaling: " << CC.Prob << "\n"); CC.Prob = scaleCaseProbality(CC.Prob, TopCaseProb); LLVM_DEBUG(dbgs() << "After scaling: " << CC.Prob << "\n"); } PeeledCaseProb = TopCaseProb; return PeeledSwitchMBB; } void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { // Extract cases from the switch. BranchProbabilityInfo *BPI = FuncInfo.BPI; CaseClusterVector Clusters; Clusters.reserve(SI.getNumCases()); for (auto I : SI.cases()) { MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()]; const ConstantInt *CaseVal = I.getCaseValue(); BranchProbability Prob = BPI ? BPI->getEdgeProbability(SI.getParent(), I.getSuccessorIndex()) : BranchProbability(1, SI.getNumCases() + 1); Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Prob)); } MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()]; // Cluster adjacent cases with the same destination. We do this at all // optimization levels because it's cheap to do and will make codegen faster // if there are many clusters. sortAndRangeify(Clusters); // The branch probablity of the peeled case. BranchProbability PeeledCaseProb = BranchProbability::getZero(); MachineBasicBlock *PeeledSwitchMBB = peelDominantCaseCluster(SI, Clusters, PeeledCaseProb); // If there is only the default destination, jump there directly. MachineBasicBlock *SwitchMBB = FuncInfo.MBB; if (Clusters.empty()) { assert(PeeledSwitchMBB == SwitchMBB); SwitchMBB->addSuccessor(DefaultMBB); if (DefaultMBB != NextBlock(SwitchMBB)) { DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(DefaultMBB))); } return; } SL->findJumpTables(Clusters, &SI, getCurSDLoc(), DefaultMBB, DAG.getPSI(), DAG.getBFI()); SL->findBitTestClusters(Clusters, &SI); LLVM_DEBUG({ dbgs() << "Case clusters: "; for (const CaseCluster &C : Clusters) { if (C.Kind == CC_JumpTable) dbgs() << "JT:"; if (C.Kind == CC_BitTests) dbgs() << "BT:"; C.Low->getValue().print(dbgs(), true); if (C.Low != C.High) { dbgs() << '-'; C.High->getValue().print(dbgs(), true); } dbgs() << ' '; } dbgs() << '\n'; }); assert(!Clusters.empty()); SwitchWorkList WorkList; CaseClusterIt First = Clusters.begin(); CaseClusterIt Last = Clusters.end() - 1; auto DefaultProb = getEdgeProbability(PeeledSwitchMBB, DefaultMBB); // Scale the branchprobability for DefaultMBB if the peel occurs and // DefaultMBB is not replaced. if (PeeledCaseProb != BranchProbability::getZero() && DefaultMBB == FuncInfo.MBBMap[SI.getDefaultDest()]) DefaultProb = scaleCaseProbality(DefaultProb, PeeledCaseProb); WorkList.push_back( {PeeledSwitchMBB, First, Last, nullptr, nullptr, DefaultProb}); while (!WorkList.empty()) { SwitchWorkListItem W = WorkList.pop_back_val(); unsigned NumClusters = W.LastCluster - W.FirstCluster + 1; if (NumClusters > 3 && TM.getOptLevel() != CodeGenOptLevel::None && !DefaultMBB->getParent()->getFunction().hasMinSize()) { // For optimized builds, lower large range as a balanced binary tree. splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB); continue; } lowerWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB); } } void SelectionDAGBuilder::visitStepVector(const CallInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); auto DL = getCurSDLoc(); EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getStepVector(DL, ResultVT)); } void SelectionDAGBuilder::visitVectorReverse(const CallInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); SDLoc DL = getCurSDLoc(); SDValue V = getValue(I.getOperand(0)); assert(VT == V.getValueType() && "Malformed vector.reverse!"); if (VT.isScalableVector()) { setValue(&I, DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V)); return; } // Use VECTOR_SHUFFLE for the fixed-length vector // to maintain existing behavior. SmallVector Mask; unsigned NumElts = VT.getVectorMinNumElements(); for (unsigned i = 0; i != NumElts; ++i) Mask.push_back(NumElts - 1 - i); setValue(&I, DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), Mask)); } void SelectionDAGBuilder::visitVectorDeinterleave(const CallInst &I) { auto DL = getCurSDLoc(); SDValue InVec = getValue(I.getOperand(0)); EVT OutVT = InVec.getValueType().getHalfNumVectorElementsVT(*DAG.getContext()); unsigned OutNumElts = OutVT.getVectorMinNumElements(); // ISD Node needs the input vectors split into two equal parts SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OutVT, InVec, DAG.getVectorIdxConstant(0, DL)); SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OutVT, InVec, DAG.getVectorIdxConstant(OutNumElts, DL)); // Use VECTOR_SHUFFLE for fixed-length vectors to benefit from existing // legalisation and combines. if (OutVT.isFixedLengthVector()) { SDValue Even = DAG.getVectorShuffle(OutVT, DL, Lo, Hi, createStrideMask(0, 2, OutNumElts)); SDValue Odd = DAG.getVectorShuffle(OutVT, DL, Lo, Hi, createStrideMask(1, 2, OutNumElts)); SDValue Res = DAG.getMergeValues({Even, Odd}, getCurSDLoc()); setValue(&I, Res); return; } SDValue Res = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL, DAG.getVTList(OutVT, OutVT), Lo, Hi); setValue(&I, Res); } void SelectionDAGBuilder::visitVectorInterleave(const CallInst &I) { auto DL = getCurSDLoc(); EVT InVT = getValue(I.getOperand(0)).getValueType(); SDValue InVec0 = getValue(I.getOperand(0)); SDValue InVec1 = getValue(I.getOperand(1)); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT OutVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); // Use VECTOR_SHUFFLE for fixed-length vectors to benefit from existing // legalisation and combines. if (OutVT.isFixedLengthVector()) { unsigned NumElts = InVT.getVectorMinNumElements(); SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, InVec0, InVec1); setValue(&I, DAG.getVectorShuffle(OutVT, DL, V, DAG.getUNDEF(OutVT), createInterleaveMask(NumElts, 2))); return; } SDValue Res = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, DAG.getVTList(InVT, InVT), InVec0, InVec1); Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Res.getValue(0), Res.getValue(1)); setValue(&I, Res); } void SelectionDAGBuilder::visitFreeze(const FreezeInst &I) { SmallVector ValueVTs; ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; SmallVector Values(NumValues); SDValue Op = getValue(I.getOperand(0)); for (unsigned i = 0; i != NumValues; ++i) Values[i] = DAG.getNode(ISD::FREEZE, getCurSDLoc(), ValueVTs[i], SDValue(Op.getNode(), Op.getResNo() + i)); setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), DAG.getVTList(ValueVTs), Values)); } void SelectionDAGBuilder::visitVectorSplice(const CallInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); SDLoc DL = getCurSDLoc(); SDValue V1 = getValue(I.getOperand(0)); SDValue V2 = getValue(I.getOperand(1)); int64_t Imm = cast(I.getOperand(2))->getSExtValue(); // VECTOR_SHUFFLE doesn't support a scalable mask so use a dedicated node. if (VT.isScalableVector()) { setValue(&I, DAG.getNode(ISD::VECTOR_SPLICE, DL, VT, V1, V2, DAG.getVectorIdxConstant(Imm, DL))); return; } unsigned NumElts = VT.getVectorNumElements(); uint64_t Idx = (NumElts + Imm) % NumElts; // Use VECTOR_SHUFFLE to maintain original behaviour for fixed-length vectors. SmallVector Mask; for (unsigned i = 0; i < NumElts; ++i) Mask.push_back(Idx + i); setValue(&I, DAG.getVectorShuffle(VT, DL, V1, V2, Mask)); } // Consider the following MIR after SelectionDAG, which produces output in // phyregs in the first case or virtregs in the second case. // // INLINEASM_BR ..., implicit-def $ebx, ..., implicit-def $edx // %5:gr32 = COPY $ebx // %6:gr32 = COPY $edx // %1:gr32 = COPY %6:gr32 // %0:gr32 = COPY %5:gr32 // // INLINEASM_BR ..., def %5:gr32, ..., def %6:gr32 // %1:gr32 = COPY %6:gr32 // %0:gr32 = COPY %5:gr32 // // Given %0, we'd like to return $ebx in the first case and %5 in the second. // Given %1, we'd like to return $edx in the first case and %6 in the second. // // If a callbr has outputs, it will have a single mapping in FuncInfo.ValueMap // to a single virtreg (such as %0). The remaining outputs monotonically // increase in virtreg number from there. If a callbr has no outputs, then it // should not have a corresponding callbr landingpad; in fact, the callbr // landingpad would not even be able to refer to such a callbr. static Register FollowCopyChain(MachineRegisterInfo &MRI, Register Reg) { MachineInstr *MI = MRI.def_begin(Reg)->getParent(); // There is definitely at least one copy. assert(MI->getOpcode() == TargetOpcode::COPY && "start of copy chain MUST be COPY"); Reg = MI->getOperand(1).getReg(); MI = MRI.def_begin(Reg)->getParent(); // There may be an optional second copy. if (MI->getOpcode() == TargetOpcode::COPY) { assert(Reg.isVirtual() && "expected COPY of virtual register"); Reg = MI->getOperand(1).getReg(); assert(Reg.isPhysical() && "expected COPY of physical register"); MI = MRI.def_begin(Reg)->getParent(); } // The start of the chain must be an INLINEASM_BR. assert(MI->getOpcode() == TargetOpcode::INLINEASM_BR && "end of copy chain MUST be INLINEASM_BR"); return Reg; } // We must do this walk rather than the simpler // setValue(&I, getCopyFromRegs(CBR, CBR->getType())); // otherwise we will end up with copies of virtregs only valid along direct // edges. void SelectionDAGBuilder::visitCallBrLandingPad(const CallInst &I) { SmallVector ResultVTs; SmallVector ResultValues; const auto *CBR = cast(I.getParent()->getUniquePredecessor()->getTerminator()); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo(); MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); unsigned InitialDef = FuncInfo.ValueMap[CBR]; SDValue Chain = DAG.getRoot(); // Re-parse the asm constraints string. TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(DAG.getDataLayout(), TRI, *CBR); for (auto &T : TargetConstraints) { SDISelAsmOperandInfo OpInfo(T); if (OpInfo.Type != InlineAsm::isOutput) continue; // Pencil in OpInfo.ConstraintType and OpInfo.ConstraintVT based on the // individual constraint. TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG); switch (OpInfo.ConstraintType) { case TargetLowering::C_Register: case TargetLowering::C_RegisterClass: { // Fill in OpInfo.AssignedRegs.Regs. getRegistersForValue(DAG, getCurSDLoc(), OpInfo, OpInfo); // getRegistersForValue may produce 1 to many registers based on whether // the OpInfo.ConstraintVT is legal on the target or not. for (unsigned &Reg : OpInfo.AssignedRegs.Regs) { Register OriginalDef = FollowCopyChain(MRI, InitialDef++); if (Register::isPhysicalRegister(OriginalDef)) FuncInfo.MBB->addLiveIn(OriginalDef); // Update the assigned registers to use the original defs. Reg = OriginalDef; } SDValue V = OpInfo.AssignedRegs.getCopyFromRegs( DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, CBR); ResultValues.push_back(V); ResultVTs.push_back(OpInfo.ConstraintVT); break; } case TargetLowering::C_Other: { SDValue Flag; SDValue V = TLI.LowerAsmOutputForConstraint(Chain, Flag, getCurSDLoc(), OpInfo, DAG); ++InitialDef; ResultValues.push_back(V); ResultVTs.push_back(OpInfo.ConstraintVT); break; } default: break; } } SDValue V = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), DAG.getVTList(ResultVTs), ResultValues); setValue(&I, V); } diff --git a/llvm/lib/Demangle/MicrosoftDemangle.cpp b/llvm/lib/Demangle/MicrosoftDemangle.cpp index c5835e8c2e98..d35902a33376 100644 --- a/llvm/lib/Demangle/MicrosoftDemangle.cpp +++ b/llvm/lib/Demangle/MicrosoftDemangle.cpp @@ -1,2465 +1,2484 @@ //===- MicrosoftDemangle.cpp ----------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file defines a demangler for MSVC-style mangled symbols. // // This file has no dependencies on the rest of LLVM so that it can be // easily reused in other programs such as libcxxabi. // //===----------------------------------------------------------------------===// #include "llvm/Demangle/MicrosoftDemangle.h" #include "llvm/Demangle/Demangle.h" #include "llvm/Demangle/DemangleConfig.h" #include "llvm/Demangle/MicrosoftDemangleNodes.h" #include "llvm/Demangle/StringViewExtras.h" #include "llvm/Demangle/Utility.h" #include #include #include +#include #include #include using namespace llvm; using namespace ms_demangle; static bool startsWithDigit(std::string_view S) { return !S.empty() && std::isdigit(S.front()); } struct NodeList { Node *N = nullptr; NodeList *Next = nullptr; }; static bool consumeFront(std::string_view &S, char C) { if (!llvm::itanium_demangle::starts_with(S, C)) return false; S.remove_prefix(1); return true; } static bool consumeFront(std::string_view &S, std::string_view C) { if (!llvm::itanium_demangle::starts_with(S, C)) return false; S.remove_prefix(C.size()); return true; } static bool consumeFront(std::string_view &S, std::string_view PrefixA, std::string_view PrefixB, bool A) { const std::string_view &Prefix = A ? PrefixA : PrefixB; return consumeFront(S, Prefix); } static bool startsWith(std::string_view S, std::string_view PrefixA, std::string_view PrefixB, bool A) { const std::string_view &Prefix = A ? PrefixA : PrefixB; return llvm::itanium_demangle::starts_with(S, Prefix); } static bool isMemberPointer(std::string_view MangledName, bool &Error) { Error = false; const char F = MangledName.front(); MangledName.remove_prefix(1); switch (F) { case '$': // This is probably an rvalue reference (e.g. $$Q), and you cannot have an // rvalue reference to a member. return false; case 'A': // 'A' indicates a reference, and you cannot have a reference to a member // function or member. return false; case 'P': case 'Q': case 'R': case 'S': // These 4 values indicate some kind of pointer, but we still don't know // what. break; default: // isMemberPointer() is called only if isPointerType() returns true, // and it rejects other prefixes. DEMANGLE_UNREACHABLE; } // If it starts with a number, then 6 indicates a non-member function // pointer, and 8 indicates a member function pointer. if (startsWithDigit(MangledName)) { if (MangledName[0] != '6' && MangledName[0] != '8') { Error = true; return false; } return (MangledName[0] == '8'); } // Remove ext qualifiers since those can appear on either type and are // therefore not indicative. consumeFront(MangledName, 'E'); // 64-bit consumeFront(MangledName, 'I'); // restrict consumeFront(MangledName, 'F'); // unaligned if (MangledName.empty()) { Error = true; return false; } // The next value should be either ABCD (non-member) or QRST (member). switch (MangledName.front()) { case 'A': case 'B': case 'C': case 'D': return false; case 'Q': case 'R': case 'S': case 'T': return true; default: Error = true; return false; } } static SpecialIntrinsicKind consumeSpecialIntrinsicKind(std::string_view &MangledName) { if (consumeFront(MangledName, "?_7")) return SpecialIntrinsicKind::Vftable; if (consumeFront(MangledName, "?_8")) return SpecialIntrinsicKind::Vbtable; if (consumeFront(MangledName, "?_9")) return SpecialIntrinsicKind::VcallThunk; if (consumeFront(MangledName, "?_A")) return SpecialIntrinsicKind::Typeof; if (consumeFront(MangledName, "?_B")) return SpecialIntrinsicKind::LocalStaticGuard; if (consumeFront(MangledName, "?_C")) return SpecialIntrinsicKind::StringLiteralSymbol; if (consumeFront(MangledName, "?_P")) return SpecialIntrinsicKind::UdtReturning; if (consumeFront(MangledName, "?_R0")) return SpecialIntrinsicKind::RttiTypeDescriptor; if (consumeFront(MangledName, "?_R1")) return SpecialIntrinsicKind::RttiBaseClassDescriptor; if (consumeFront(MangledName, "?_R2")) return SpecialIntrinsicKind::RttiBaseClassArray; if (consumeFront(MangledName, "?_R3")) return SpecialIntrinsicKind::RttiClassHierarchyDescriptor; if (consumeFront(MangledName, "?_R4")) return SpecialIntrinsicKind::RttiCompleteObjLocator; if (consumeFront(MangledName, "?_S")) return SpecialIntrinsicKind::LocalVftable; if (consumeFront(MangledName, "?__E")) return SpecialIntrinsicKind::DynamicInitializer; if (consumeFront(MangledName, "?__F")) return SpecialIntrinsicKind::DynamicAtexitDestructor; if (consumeFront(MangledName, "?__J")) return SpecialIntrinsicKind::LocalStaticThreadGuard; return SpecialIntrinsicKind::None; } static bool startsWithLocalScopePattern(std::string_view S) { if (!consumeFront(S, '?')) return false; size_t End = S.find('?'); if (End == std::string_view::npos) return false; std::string_view Candidate = S.substr(0, End); if (Candidate.empty()) return false; // \?[0-9]\? // ?@? is the discriminator 0. if (Candidate.size() == 1) return Candidate[0] == '@' || (Candidate[0] >= '0' && Candidate[0] <= '9'); // If it's not 0-9, then it's an encoded number terminated with an @ if (Candidate.back() != '@') return false; Candidate.remove_suffix(1); // An encoded number starts with B-P and all subsequent digits are in A-P. // Note that the reason the first digit cannot be A is two fold. First, it // would create an ambiguity with ?A which delimits the beginning of an // anonymous namespace. Second, A represents 0, and you don't start a multi // digit number with a leading 0. Presumably the anonymous namespace // ambiguity is also why single digit encoded numbers use 0-9 rather than A-J. if (Candidate[0] < 'B' || Candidate[0] > 'P') return false; Candidate.remove_prefix(1); while (!Candidate.empty()) { if (Candidate[0] < 'A' || Candidate[0] > 'P') return false; Candidate.remove_prefix(1); } return true; } static bool isTagType(std::string_view S) { switch (S.front()) { case 'T': // union case 'U': // struct case 'V': // class case 'W': // enum return true; } return false; } static bool isCustomType(std::string_view S) { return S[0] == '?'; } static bool isPointerType(std::string_view S) { if (llvm::itanium_demangle::starts_with(S, "$$Q")) // foo && return true; switch (S.front()) { case 'A': // foo & case 'P': // foo * case 'Q': // foo *const case 'R': // foo *volatile case 'S': // foo *const volatile return true; } return false; } static bool isArrayType(std::string_view S) { return S[0] == 'Y'; } static bool isFunctionType(std::string_view S) { return llvm::itanium_demangle::starts_with(S, "$$A8@@") || llvm::itanium_demangle::starts_with(S, "$$A6"); } static FunctionRefQualifier demangleFunctionRefQualifier(std::string_view &MangledName) { if (consumeFront(MangledName, 'G')) return FunctionRefQualifier::Reference; else if (consumeFront(MangledName, 'H')) return FunctionRefQualifier::RValueReference; return FunctionRefQualifier::None; } static std::pair demanglePointerCVQualifiers(std::string_view &MangledName) { if (consumeFront(MangledName, "$$Q")) return std::make_pair(Q_None, PointerAffinity::RValueReference); const char F = MangledName.front(); MangledName.remove_prefix(1); switch (F) { case 'A': return std::make_pair(Q_None, PointerAffinity::Reference); case 'P': return std::make_pair(Q_None, PointerAffinity::Pointer); case 'Q': return std::make_pair(Q_Const, PointerAffinity::Pointer); case 'R': return std::make_pair(Q_Volatile, PointerAffinity::Pointer); case 'S': return std::make_pair(Qualifiers(Q_Const | Q_Volatile), PointerAffinity::Pointer); } // This function is only called if isPointerType() returns true, // and it only returns true for the six cases listed above. DEMANGLE_UNREACHABLE; } std::string_view Demangler::copyString(std::string_view Borrowed) { char *Stable = Arena.allocUnalignedBuffer(Borrowed.size()); // This is not a micro-optimization, it avoids UB, should Borrowed be an null // buffer. if (Borrowed.size()) std::memcpy(Stable, Borrowed.data(), Borrowed.size()); return {Stable, Borrowed.size()}; } SpecialTableSymbolNode * Demangler::demangleSpecialTableSymbolNode(std::string_view &MangledName, SpecialIntrinsicKind K) { NamedIdentifierNode *NI = Arena.alloc(); switch (K) { case SpecialIntrinsicKind::Vftable: NI->Name = "`vftable'"; break; case SpecialIntrinsicKind::Vbtable: NI->Name = "`vbtable'"; break; case SpecialIntrinsicKind::LocalVftable: NI->Name = "`local vftable'"; break; case SpecialIntrinsicKind::RttiCompleteObjLocator: NI->Name = "`RTTI Complete Object Locator'"; break; default: DEMANGLE_UNREACHABLE; } QualifiedNameNode *QN = demangleNameScopeChain(MangledName, NI); SpecialTableSymbolNode *STSN = Arena.alloc(); STSN->Name = QN; bool IsMember = false; if (MangledName.empty()) { Error = true; return nullptr; } char Front = MangledName.front(); MangledName.remove_prefix(1); if (Front != '6' && Front != '7') { Error = true; return nullptr; } std::tie(STSN->Quals, IsMember) = demangleQualifiers(MangledName); if (!consumeFront(MangledName, '@')) STSN->TargetName = demangleFullyQualifiedTypeName(MangledName); return STSN; } LocalStaticGuardVariableNode * Demangler::demangleLocalStaticGuard(std::string_view &MangledName, bool IsThread) { LocalStaticGuardIdentifierNode *LSGI = Arena.alloc(); LSGI->IsThread = IsThread; QualifiedNameNode *QN = demangleNameScopeChain(MangledName, LSGI); LocalStaticGuardVariableNode *LSGVN = Arena.alloc(); LSGVN->Name = QN; if (consumeFront(MangledName, "4IA")) LSGVN->IsVisible = false; else if (consumeFront(MangledName, "5")) LSGVN->IsVisible = true; else { Error = true; return nullptr; } if (!MangledName.empty()) LSGI->ScopeIndex = demangleUnsigned(MangledName); return LSGVN; } static NamedIdentifierNode *synthesizeNamedIdentifier(ArenaAllocator &Arena, std::string_view Name) { NamedIdentifierNode *Id = Arena.alloc(); Id->Name = Name; return Id; } static QualifiedNameNode *synthesizeQualifiedName(ArenaAllocator &Arena, IdentifierNode *Identifier) { QualifiedNameNode *QN = Arena.alloc(); QN->Components = Arena.alloc(); QN->Components->Count = 1; QN->Components->Nodes = Arena.allocArray(1); QN->Components->Nodes[0] = Identifier; return QN; } static QualifiedNameNode *synthesizeQualifiedName(ArenaAllocator &Arena, std::string_view Name) { NamedIdentifierNode *Id = synthesizeNamedIdentifier(Arena, Name); return synthesizeQualifiedName(Arena, Id); } static VariableSymbolNode *synthesizeVariable(ArenaAllocator &Arena, TypeNode *Type, std::string_view VariableName) { VariableSymbolNode *VSN = Arena.alloc(); VSN->Type = Type; VSN->Name = synthesizeQualifiedName(Arena, VariableName); return VSN; } VariableSymbolNode * Demangler::demangleUntypedVariable(ArenaAllocator &Arena, std::string_view &MangledName, std::string_view VariableName) { NamedIdentifierNode *NI = synthesizeNamedIdentifier(Arena, VariableName); QualifiedNameNode *QN = demangleNameScopeChain(MangledName, NI); VariableSymbolNode *VSN = Arena.alloc(); VSN->Name = QN; if (consumeFront(MangledName, "8")) return VSN; Error = true; return nullptr; } VariableSymbolNode * Demangler::demangleRttiBaseClassDescriptorNode(ArenaAllocator &Arena, std::string_view &MangledName) { RttiBaseClassDescriptorNode *RBCDN = Arena.alloc(); RBCDN->NVOffset = demangleUnsigned(MangledName); RBCDN->VBPtrOffset = demangleSigned(MangledName); RBCDN->VBTableOffset = demangleUnsigned(MangledName); RBCDN->Flags = demangleUnsigned(MangledName); if (Error) return nullptr; VariableSymbolNode *VSN = Arena.alloc(); VSN->Name = demangleNameScopeChain(MangledName, RBCDN); consumeFront(MangledName, '8'); return VSN; } FunctionSymbolNode * Demangler::demangleInitFiniStub(std::string_view &MangledName, bool IsDestructor) { DynamicStructorIdentifierNode *DSIN = Arena.alloc(); DSIN->IsDestructor = IsDestructor; bool IsKnownStaticDataMember = false; if (consumeFront(MangledName, '?')) IsKnownStaticDataMember = true; SymbolNode *Symbol = demangleDeclarator(MangledName); if (Error) return nullptr; FunctionSymbolNode *FSN = nullptr; if (Symbol->kind() == NodeKind::VariableSymbol) { DSIN->Variable = static_cast(Symbol); // Older versions of clang mangled this type of symbol incorrectly. They // would omit the leading ? and they would only emit a single @ at the end. // The correct mangling is a leading ? and 2 trailing @ signs. Handle // both cases. int AtCount = IsKnownStaticDataMember ? 2 : 1; for (int I = 0; I < AtCount; ++I) { if (consumeFront(MangledName, '@')) continue; Error = true; return nullptr; } FSN = demangleFunctionEncoding(MangledName); if (FSN) FSN->Name = synthesizeQualifiedName(Arena, DSIN); } else { if (IsKnownStaticDataMember) { // This was supposed to be a static data member, but we got a function. Error = true; return nullptr; } FSN = static_cast(Symbol); DSIN->Name = Symbol->Name; FSN->Name = synthesizeQualifiedName(Arena, DSIN); } return FSN; } SymbolNode *Demangler::demangleSpecialIntrinsic(std::string_view &MangledName) { SpecialIntrinsicKind SIK = consumeSpecialIntrinsicKind(MangledName); switch (SIK) { case SpecialIntrinsicKind::None: return nullptr; case SpecialIntrinsicKind::StringLiteralSymbol: return demangleStringLiteral(MangledName); case SpecialIntrinsicKind::Vftable: case SpecialIntrinsicKind::Vbtable: case SpecialIntrinsicKind::LocalVftable: case SpecialIntrinsicKind::RttiCompleteObjLocator: return demangleSpecialTableSymbolNode(MangledName, SIK); case SpecialIntrinsicKind::VcallThunk: return demangleVcallThunkNode(MangledName); case SpecialIntrinsicKind::LocalStaticGuard: return demangleLocalStaticGuard(MangledName, /*IsThread=*/false); case SpecialIntrinsicKind::LocalStaticThreadGuard: return demangleLocalStaticGuard(MangledName, /*IsThread=*/true); case SpecialIntrinsicKind::RttiTypeDescriptor: { TypeNode *T = demangleType(MangledName, QualifierMangleMode::Result); if (Error) break; if (!consumeFront(MangledName, "@8")) break; if (!MangledName.empty()) break; return synthesizeVariable(Arena, T, "`RTTI Type Descriptor'"); } case SpecialIntrinsicKind::RttiBaseClassArray: return demangleUntypedVariable(Arena, MangledName, "`RTTI Base Class Array'"); case SpecialIntrinsicKind::RttiClassHierarchyDescriptor: return demangleUntypedVariable(Arena, MangledName, "`RTTI Class Hierarchy Descriptor'"); case SpecialIntrinsicKind::RttiBaseClassDescriptor: return demangleRttiBaseClassDescriptorNode(Arena, MangledName); case SpecialIntrinsicKind::DynamicInitializer: return demangleInitFiniStub(MangledName, /*IsDestructor=*/false); case SpecialIntrinsicKind::DynamicAtexitDestructor: return demangleInitFiniStub(MangledName, /*IsDestructor=*/true); case SpecialIntrinsicKind::Typeof: case SpecialIntrinsicKind::UdtReturning: // It's unclear which tools produces these manglings, so demangling // support is not (yet?) implemented. break; case SpecialIntrinsicKind::Unknown: DEMANGLE_UNREACHABLE; // Never returned by consumeSpecialIntrinsicKind. } Error = true; return nullptr; } IdentifierNode * Demangler::demangleFunctionIdentifierCode(std::string_view &MangledName) { assert(llvm::itanium_demangle::starts_with(MangledName, '?')); MangledName.remove_prefix(1); if (MangledName.empty()) { Error = true; return nullptr; } if (consumeFront(MangledName, "__")) return demangleFunctionIdentifierCode( MangledName, FunctionIdentifierCodeGroup::DoubleUnder); if (consumeFront(MangledName, "_")) return demangleFunctionIdentifierCode(MangledName, FunctionIdentifierCodeGroup::Under); return demangleFunctionIdentifierCode(MangledName, FunctionIdentifierCodeGroup::Basic); } StructorIdentifierNode * Demangler::demangleStructorIdentifier(std::string_view &MangledName, bool IsDestructor) { StructorIdentifierNode *N = Arena.alloc(); N->IsDestructor = IsDestructor; return N; } ConversionOperatorIdentifierNode * Demangler::demangleConversionOperatorIdentifier(std::string_view &MangledName) { ConversionOperatorIdentifierNode *N = Arena.alloc(); return N; } LiteralOperatorIdentifierNode * Demangler::demangleLiteralOperatorIdentifier(std::string_view &MangledName) { LiteralOperatorIdentifierNode *N = Arena.alloc(); N->Name = demangleSimpleString(MangledName, /*Memorize=*/false); return N; } IntrinsicFunctionKind Demangler::translateIntrinsicFunctionCode(char CH, FunctionIdentifierCodeGroup Group) { using IFK = IntrinsicFunctionKind; if (!(CH >= '0' && CH <= '9') && !(CH >= 'A' && CH <= 'Z')) { Error = true; return IFK::None; } // Not all ? identifiers are intrinsics *functions*. This function only maps // operator codes for the special functions, all others are handled elsewhere, // hence the IFK::None entries in the table. static IFK Basic[36] = { IFK::None, // ?0 # Foo::Foo() IFK::None, // ?1 # Foo::~Foo() IFK::New, // ?2 # operator new IFK::Delete, // ?3 # operator delete IFK::Assign, // ?4 # operator= IFK::RightShift, // ?5 # operator>> IFK::LeftShift, // ?6 # operator<< IFK::LogicalNot, // ?7 # operator! IFK::Equals, // ?8 # operator== IFK::NotEquals, // ?9 # operator!= IFK::ArraySubscript, // ?A # operator[] IFK::None, // ?B # Foo::operator () IFK::Pointer, // ?C # operator-> IFK::Dereference, // ?D # operator* IFK::Increment, // ?E # operator++ IFK::Decrement, // ?F # operator-- IFK::Minus, // ?G # operator- IFK::Plus, // ?H # operator+ IFK::BitwiseAnd, // ?I # operator& IFK::MemberPointer, // ?J # operator->* IFK::Divide, // ?K # operator/ IFK::Modulus, // ?L # operator% IFK::LessThan, // ?M operator< IFK::LessThanEqual, // ?N operator<= IFK::GreaterThan, // ?O operator> IFK::GreaterThanEqual, // ?P operator>= IFK::Comma, // ?Q operator, IFK::Parens, // ?R operator() IFK::BitwiseNot, // ?S operator~ IFK::BitwiseXor, // ?T operator^ IFK::BitwiseOr, // ?U operator| IFK::LogicalAnd, // ?V operator&& IFK::LogicalOr, // ?W operator|| IFK::TimesEqual, // ?X operator*= IFK::PlusEqual, // ?Y operator+= IFK::MinusEqual, // ?Z operator-= }; static IFK Under[36] = { IFK::DivEqual, // ?_0 operator/= IFK::ModEqual, // ?_1 operator%= IFK::RshEqual, // ?_2 operator>>= IFK::LshEqual, // ?_3 operator<<= IFK::BitwiseAndEqual, // ?_4 operator&= IFK::BitwiseOrEqual, // ?_5 operator|= IFK::BitwiseXorEqual, // ?_6 operator^= IFK::None, // ?_7 # vftable IFK::None, // ?_8 # vbtable IFK::None, // ?_9 # vcall IFK::None, // ?_A # typeof IFK::None, // ?_B # local static guard IFK::None, // ?_C # string literal IFK::VbaseDtor, // ?_D # vbase destructor IFK::VecDelDtor, // ?_E # vector deleting destructor IFK::DefaultCtorClosure, // ?_F # default constructor closure IFK::ScalarDelDtor, // ?_G # scalar deleting destructor IFK::VecCtorIter, // ?_H # vector constructor iterator IFK::VecDtorIter, // ?_I # vector destructor iterator IFK::VecVbaseCtorIter, // ?_J # vector vbase constructor iterator IFK::VdispMap, // ?_K # virtual displacement map IFK::EHVecCtorIter, // ?_L # eh vector constructor iterator IFK::EHVecDtorIter, // ?_M # eh vector destructor iterator IFK::EHVecVbaseCtorIter, // ?_N # eh vector vbase constructor iterator IFK::CopyCtorClosure, // ?_O # copy constructor closure IFK::None, // ?_P # udt returning IFK::None, // ?_Q # IFK::None, // ?_R0 - ?_R4 # RTTI Codes IFK::None, // ?_S # local vftable IFK::LocalVftableCtorClosure, // ?_T # local vftable constructor closure IFK::ArrayNew, // ?_U operator new[] IFK::ArrayDelete, // ?_V operator delete[] IFK::None, // ?_W IFK::None, // ?_X IFK::None, // ?_Y IFK::None, // ?_Z }; static IFK DoubleUnder[36] = { IFK::None, // ?__0 IFK::None, // ?__1 IFK::None, // ?__2 IFK::None, // ?__3 IFK::None, // ?__4 IFK::None, // ?__5 IFK::None, // ?__6 IFK::None, // ?__7 IFK::None, // ?__8 IFK::None, // ?__9 IFK::ManVectorCtorIter, // ?__A managed vector ctor iterator IFK::ManVectorDtorIter, // ?__B managed vector dtor iterator IFK::EHVectorCopyCtorIter, // ?__C EH vector copy ctor iterator IFK::EHVectorVbaseCopyCtorIter, // ?__D EH vector vbase copy ctor iter IFK::None, // ?__E dynamic initializer for `T' IFK::None, // ?__F dynamic atexit destructor for `T' IFK::VectorCopyCtorIter, // ?__G vector copy constructor iter IFK::VectorVbaseCopyCtorIter, // ?__H vector vbase copy ctor iter IFK::ManVectorVbaseCopyCtorIter, // ?__I managed vector vbase copy ctor // iter IFK::None, // ?__J local static thread guard IFK::None, // ?__K operator ""_name IFK::CoAwait, // ?__L operator co_await IFK::Spaceship, // ?__M operator<=> IFK::None, // ?__N IFK::None, // ?__O IFK::None, // ?__P IFK::None, // ?__Q IFK::None, // ?__R IFK::None, // ?__S IFK::None, // ?__T IFK::None, // ?__U IFK::None, // ?__V IFK::None, // ?__W IFK::None, // ?__X IFK::None, // ?__Y IFK::None, // ?__Z }; int Index = (CH >= '0' && CH <= '9') ? (CH - '0') : (CH - 'A' + 10); switch (Group) { case FunctionIdentifierCodeGroup::Basic: return Basic[Index]; case FunctionIdentifierCodeGroup::Under: return Under[Index]; case FunctionIdentifierCodeGroup::DoubleUnder: return DoubleUnder[Index]; } DEMANGLE_UNREACHABLE; } IdentifierNode * Demangler::demangleFunctionIdentifierCode(std::string_view &MangledName, FunctionIdentifierCodeGroup Group) { if (MangledName.empty()) { Error = true; return nullptr; } const char CH = MangledName.front(); switch (Group) { case FunctionIdentifierCodeGroup::Basic: MangledName.remove_prefix(1); switch (CH) { case '0': case '1': return demangleStructorIdentifier(MangledName, CH == '1'); case 'B': return demangleConversionOperatorIdentifier(MangledName); default: return Arena.alloc( translateIntrinsicFunctionCode(CH, Group)); } case FunctionIdentifierCodeGroup::Under: MangledName.remove_prefix(1); return Arena.alloc( translateIntrinsicFunctionCode(CH, Group)); case FunctionIdentifierCodeGroup::DoubleUnder: MangledName.remove_prefix(1); switch (CH) { case 'K': return demangleLiteralOperatorIdentifier(MangledName); default: return Arena.alloc( translateIntrinsicFunctionCode(CH, Group)); } } DEMANGLE_UNREACHABLE; } SymbolNode *Demangler::demangleEncodedSymbol(std::string_view &MangledName, QualifiedNameNode *Name) { if (MangledName.empty()) { Error = true; return nullptr; } // Read a variable. switch (MangledName.front()) { case '0': case '1': case '2': case '3': case '4': { StorageClass SC = demangleVariableStorageClass(MangledName); return demangleVariableEncoding(MangledName, SC); } } FunctionSymbolNode *FSN = demangleFunctionEncoding(MangledName); IdentifierNode *UQN = Name->getUnqualifiedIdentifier(); if (UQN->kind() == NodeKind::ConversionOperatorIdentifier) { ConversionOperatorIdentifierNode *COIN = static_cast(UQN); if (FSN) COIN->TargetType = FSN->Signature->ReturnType; } return FSN; } SymbolNode *Demangler::demangleDeclarator(std::string_view &MangledName) { // What follows is a main symbol name. This may include namespaces or class // back references. QualifiedNameNode *QN = demangleFullyQualifiedSymbolName(MangledName); if (Error) return nullptr; SymbolNode *Symbol = demangleEncodedSymbol(MangledName, QN); if (Error) return nullptr; Symbol->Name = QN; IdentifierNode *UQN = QN->getUnqualifiedIdentifier(); if (UQN->kind() == NodeKind::ConversionOperatorIdentifier) { ConversionOperatorIdentifierNode *COIN = static_cast(UQN); if (!COIN->TargetType) { Error = true; return nullptr; } } return Symbol; } SymbolNode *Demangler::demangleMD5Name(std::string_view &MangledName) { assert(llvm::itanium_demangle::starts_with(MangledName, "??@")); // This is an MD5 mangled name. We can't demangle it, just return the // mangled name. // An MD5 mangled name is ??@ followed by 32 characters and a terminating @. size_t MD5Last = MangledName.find('@', strlen("??@")); if (MD5Last == std::string_view::npos) { Error = true; return nullptr; } const char *Start = MangledName.data(); const size_t StartSize = MangledName.size(); MangledName.remove_prefix(MD5Last + 1); // There are two additional special cases for MD5 names: // 1. For complete object locators where the object name is long enough // for the object to have an MD5 name, the complete object locator is // called ??@...@??_R4@ (with a trailing "??_R4@" instead of the usual // leading "??_R4". This is handled here. // 2. For catchable types, in versions of MSVC before 2015 (<1900) or after // 2017.2 (>= 1914), the catchable type mangling is _CT??@...@??@...@8 // instead of_CT??@...@8 with just one MD5 name. Since we don't yet // demangle catchable types anywhere, this isn't handled for MD5 names // either. consumeFront(MangledName, "??_R4@"); assert(MangledName.size() < StartSize); const size_t Count = StartSize - MangledName.size(); std::string_view MD5(Start, Count); SymbolNode *S = Arena.alloc(NodeKind::Md5Symbol); S->Name = synthesizeQualifiedName(Arena, MD5); return S; } SymbolNode *Demangler::demangleTypeinfoName(std::string_view &MangledName) { assert(llvm::itanium_demangle::starts_with(MangledName, '.')); consumeFront(MangledName, '.'); TypeNode *T = demangleType(MangledName, QualifierMangleMode::Result); if (Error || !MangledName.empty()) { Error = true; return nullptr; } return synthesizeVariable(Arena, T, "`RTTI Type Descriptor Name'"); } // Parser entry point. SymbolNode *Demangler::parse(std::string_view &MangledName) { // Typeinfo names are strings stored in RTTI data. They're not symbol names. // It's still useful to demangle them. They're the only demangled entity // that doesn't start with a "?" but a ".". if (llvm::itanium_demangle::starts_with(MangledName, '.')) return demangleTypeinfoName(MangledName); if (llvm::itanium_demangle::starts_with(MangledName, "??@")) return demangleMD5Name(MangledName); // MSVC-style mangled symbols must start with '?'. if (!llvm::itanium_demangle::starts_with(MangledName, '?')) { Error = true; return nullptr; } consumeFront(MangledName, '?'); // ?$ is a template instantiation, but all other names that start with ? are // operators / special names. if (SymbolNode *SI = demangleSpecialIntrinsic(MangledName)) return SI; return demangleDeclarator(MangledName); } TagTypeNode *Demangler::parseTagUniqueName(std::string_view &MangledName) { if (!consumeFront(MangledName, ".?A")) { Error = true; return nullptr; } consumeFront(MangledName, ".?A"); if (MangledName.empty()) { Error = true; return nullptr; } return demangleClassType(MangledName); } // ::= // ::= 0 # private static member // ::= 1 # protected static member // ::= 2 # public static member // ::= 3 # global // ::= 4 # static local VariableSymbolNode * Demangler::demangleVariableEncoding(std::string_view &MangledName, StorageClass SC) { VariableSymbolNode *VSN = Arena.alloc(); VSN->Type = demangleType(MangledName, QualifierMangleMode::Drop); VSN->SC = SC; if (Error) return nullptr; // ::= // ::= # pointers, references switch (VSN->Type->kind()) { case NodeKind::PointerType: { PointerTypeNode *PTN = static_cast(VSN->Type); Qualifiers ExtraChildQuals = Q_None; PTN->Quals = Qualifiers(VSN->Type->Quals | demanglePointerExtQualifiers(MangledName)); bool IsMember = false; std::tie(ExtraChildQuals, IsMember) = demangleQualifiers(MangledName); if (PTN->ClassParent) { QualifiedNameNode *BackRefName = demangleFullyQualifiedTypeName(MangledName); (void)BackRefName; } PTN->Pointee->Quals = Qualifiers(PTN->Pointee->Quals | ExtraChildQuals); break; } default: VSN->Type->Quals = demangleQualifiers(MangledName).first; break; } return VSN; } // Sometimes numbers are encoded in mangled symbols. For example, // "int (*x)[20]" is a valid C type (x is a pointer to an array of // length 20), so we need some way to embed numbers as part of symbols. // This function parses it. // // ::= [?] // // ::= # when 1 <= Number <= 10 // ::= + @ # when Number == 0 or >= 10 // // ::= [A-P] # A = 0, B = 1, ... std::pair Demangler::demangleNumber(std::string_view &MangledName) { bool IsNegative = consumeFront(MangledName, '?'); if (startsWithDigit(MangledName)) { uint64_t Ret = MangledName[0] - '0' + 1; MangledName.remove_prefix(1); return {Ret, IsNegative}; } uint64_t Ret = 0; for (size_t i = 0; i < MangledName.size(); ++i) { char C = MangledName[i]; if (C == '@') { MangledName.remove_prefix(i + 1); return {Ret, IsNegative}; } if ('A' <= C && C <= 'P') { Ret = (Ret << 4) + (C - 'A'); continue; } break; } Error = true; return {0ULL, false}; } uint64_t Demangler::demangleUnsigned(std::string_view &MangledName) { bool IsNegative = false; uint64_t Number = 0; std::tie(Number, IsNegative) = demangleNumber(MangledName); if (IsNegative) Error = true; return Number; } int64_t Demangler::demangleSigned(std::string_view &MangledName) { bool IsNegative = false; uint64_t Number = 0; std::tie(Number, IsNegative) = demangleNumber(MangledName); if (Number > INT64_MAX) Error = true; int64_t I = static_cast(Number); return IsNegative ? -I : I; } // First 10 strings can be referenced by special BackReferences ?0, ?1, ..., ?9. // Memorize it. void Demangler::memorizeString(std::string_view S) { if (Backrefs.NamesCount >= BackrefContext::Max) return; for (size_t i = 0; i < Backrefs.NamesCount; ++i) if (S == Backrefs.Names[i]->Name) return; NamedIdentifierNode *N = Arena.alloc(); N->Name = S; Backrefs.Names[Backrefs.NamesCount++] = N; } NamedIdentifierNode * Demangler::demangleBackRefName(std::string_view &MangledName) { assert(startsWithDigit(MangledName)); size_t I = MangledName[0] - '0'; if (I >= Backrefs.NamesCount) { Error = true; return nullptr; } MangledName.remove_prefix(1); return Backrefs.Names[I]; } void Demangler::memorizeIdentifier(IdentifierNode *Identifier) { // Render this class template name into a string buffer so that we can // memorize it for the purpose of back-referencing. OutputBuffer OB; Identifier->output(OB, OF_Default); std::string_view Owned = copyString(OB); memorizeString(Owned); std::free(OB.getBuffer()); } IdentifierNode * Demangler::demangleTemplateInstantiationName(std::string_view &MangledName, NameBackrefBehavior NBB) { assert(llvm::itanium_demangle::starts_with(MangledName, "?$")); consumeFront(MangledName, "?$"); BackrefContext OuterContext; std::swap(OuterContext, Backrefs); IdentifierNode *Identifier = demangleUnqualifiedSymbolName(MangledName, NBB_Simple); if (!Error) Identifier->TemplateParams = demangleTemplateParameterList(MangledName); std::swap(OuterContext, Backrefs); if (Error) return nullptr; if (NBB & NBB_Template) { // NBB_Template is only set for types and non-leaf names ("a::" in "a::b"). // Structors and conversion operators only makes sense in a leaf name, so // reject them in NBB_Template contexts. if (Identifier->kind() == NodeKind::ConversionOperatorIdentifier || Identifier->kind() == NodeKind::StructorIdentifier) { Error = true; return nullptr; } memorizeIdentifier(Identifier); } return Identifier; } NamedIdentifierNode * Demangler::demangleSimpleName(std::string_view &MangledName, bool Memorize) { std::string_view S = demangleSimpleString(MangledName, Memorize); if (Error) return nullptr; NamedIdentifierNode *Name = Arena.alloc(); Name->Name = S; return Name; } static bool isRebasedHexDigit(char C) { return (C >= 'A' && C <= 'P'); } static uint8_t rebasedHexDigitToNumber(char C) { assert(isRebasedHexDigit(C)); return (C <= 'J') ? (C - 'A') : (10 + C - 'K'); } uint8_t Demangler::demangleCharLiteral(std::string_view &MangledName) { assert(!MangledName.empty()); if (!llvm::itanium_demangle::starts_with(MangledName, '?')) { const uint8_t F = MangledName.front(); MangledName.remove_prefix(1); return F; } MangledName.remove_prefix(1); if (MangledName.empty()) goto CharLiteralError; if (consumeFront(MangledName, '$')) { // Two hex digits if (MangledName.size() < 2) goto CharLiteralError; std::string_view Nibbles = MangledName.substr(0, 2); if (!isRebasedHexDigit(Nibbles[0]) || !isRebasedHexDigit(Nibbles[1])) goto CharLiteralError; // Don't append the null terminator. uint8_t C1 = rebasedHexDigitToNumber(Nibbles[0]); uint8_t C2 = rebasedHexDigitToNumber(Nibbles[1]); MangledName.remove_prefix(2); return (C1 << 4) | C2; } if (startsWithDigit(MangledName)) { const char *Lookup = ",/\\:. \n\t'-"; char C = Lookup[MangledName[0] - '0']; MangledName.remove_prefix(1); return C; } if (MangledName[0] >= 'a' && MangledName[0] <= 'z') { char Lookup[26] = {'\xE1', '\xE2', '\xE3', '\xE4', '\xE5', '\xE6', '\xE7', '\xE8', '\xE9', '\xEA', '\xEB', '\xEC', '\xED', '\xEE', '\xEF', '\xF0', '\xF1', '\xF2', '\xF3', '\xF4', '\xF5', '\xF6', '\xF7', '\xF8', '\xF9', '\xFA'}; char C = Lookup[MangledName[0] - 'a']; MangledName.remove_prefix(1); return C; } if (MangledName[0] >= 'A' && MangledName[0] <= 'Z') { char Lookup[26] = {'\xC1', '\xC2', '\xC3', '\xC4', '\xC5', '\xC6', '\xC7', '\xC8', '\xC9', '\xCA', '\xCB', '\xCC', '\xCD', '\xCE', '\xCF', '\xD0', '\xD1', '\xD2', '\xD3', '\xD4', '\xD5', '\xD6', '\xD7', '\xD8', '\xD9', '\xDA'}; char C = Lookup[MangledName[0] - 'A']; MangledName.remove_prefix(1); return C; } CharLiteralError: Error = true; return '\0'; } wchar_t Demangler::demangleWcharLiteral(std::string_view &MangledName) { uint8_t C1, C2; C1 = demangleCharLiteral(MangledName); if (Error || MangledName.empty()) goto WCharLiteralError; C2 = demangleCharLiteral(MangledName); if (Error) goto WCharLiteralError; return ((wchar_t)C1 << 8) | (wchar_t)C2; WCharLiteralError: Error = true; return L'\0'; } static void writeHexDigit(char *Buffer, uint8_t Digit) { assert(Digit <= 15); *Buffer = (Digit < 10) ? ('0' + Digit) : ('A' + Digit - 10); } static void outputHex(OutputBuffer &OB, unsigned C) { assert (C != 0); // It's easier to do the math if we can work from right to left, but we need // to print the numbers from left to right. So render this into a temporary // buffer first, then output the temporary buffer. Each byte is of the form // \xAB, which means that each byte needs 4 characters. Since there are at // most 4 bytes, we need a 4*4+1 = 17 character temporary buffer. char TempBuffer[17]; ::memset(TempBuffer, 0, sizeof(TempBuffer)); constexpr int MaxPos = sizeof(TempBuffer) - 1; int Pos = MaxPos - 1; // TempBuffer[MaxPos] is the terminating \0. while (C != 0) { for (int I = 0; I < 2; ++I) { writeHexDigit(&TempBuffer[Pos--], C % 16); C /= 16; } } TempBuffer[Pos--] = 'x'; assert(Pos >= 0); TempBuffer[Pos--] = '\\'; OB << std::string_view(&TempBuffer[Pos + 1]); } static void outputEscapedChar(OutputBuffer &OB, unsigned C) { switch (C) { case '\0': // nul OB << "\\0"; return; case '\'': // single quote OB << "\\\'"; return; case '\"': // double quote OB << "\\\""; return; case '\\': // backslash OB << "\\\\"; return; case '\a': // bell OB << "\\a"; return; case '\b': // backspace OB << "\\b"; return; case '\f': // form feed OB << "\\f"; return; case '\n': // new line OB << "\\n"; return; case '\r': // carriage return OB << "\\r"; return; case '\t': // tab OB << "\\t"; return; case '\v': // vertical tab OB << "\\v"; return; default: break; } if (C > 0x1F && C < 0x7F) { // Standard ascii char. OB << (char)C; return; } outputHex(OB, C); } static unsigned countTrailingNullBytes(const uint8_t *StringBytes, int Length) { const uint8_t *End = StringBytes + Length - 1; unsigned Count = 0; while (Length > 0 && *End == 0) { --Length; --End; ++Count; } return Count; } static unsigned countEmbeddedNulls(const uint8_t *StringBytes, unsigned Length) { unsigned Result = 0; for (unsigned I = 0; I < Length; ++I) { if (*StringBytes++ == 0) ++Result; } return Result; } // A mangled (non-wide) string literal stores the total length of the string it // refers to (passed in NumBytes), and it contains up to 32 bytes of actual text // (passed in StringBytes, NumChars). static unsigned guessCharByteSize(const uint8_t *StringBytes, unsigned NumChars, uint64_t NumBytes) { assert(NumBytes > 0); // If the number of bytes is odd, this is guaranteed to be a char string. if (NumBytes % 2 == 1) return 1; // All strings can encode at most 32 bytes of data. If it's less than that, // then we encoded the entire string. In this case we check for a 1-byte, // 2-byte, or 4-byte null terminator. if (NumBytes < 32) { unsigned TrailingNulls = countTrailingNullBytes(StringBytes, NumChars); if (TrailingNulls >= 4 && NumBytes % 4 == 0) return 4; if (TrailingNulls >= 2) return 2; return 1; } // The whole string was not able to be encoded. Try to look at embedded null // terminators to guess. The heuristic is that we count all embedded null // terminators. If more than 2/3 are null, it's a char32. If more than 1/3 // are null, it's a char16. Otherwise it's a char8. This obviously isn't // perfect and is biased towards languages that have ascii alphabets, but this // was always going to be best effort since the encoding is lossy. unsigned Nulls = countEmbeddedNulls(StringBytes, NumChars); if (Nulls >= 2 * NumChars / 3 && NumBytes % 4 == 0) return 4; if (Nulls >= NumChars / 3) return 2; return 1; } static unsigned decodeMultiByteChar(const uint8_t *StringBytes, unsigned CharIndex, unsigned CharBytes) { assert(CharBytes == 1 || CharBytes == 2 || CharBytes == 4); unsigned Offset = CharIndex * CharBytes; unsigned Result = 0; StringBytes = StringBytes + Offset; for (unsigned I = 0; I < CharBytes; ++I) { unsigned C = static_cast(StringBytes[I]); Result |= C << (8 * I); } return Result; } FunctionSymbolNode * Demangler::demangleVcallThunkNode(std::string_view &MangledName) { FunctionSymbolNode *FSN = Arena.alloc(); VcallThunkIdentifierNode *VTIN = Arena.alloc(); FSN->Signature = Arena.alloc(); FSN->Signature->FunctionClass = FC_NoParameterList; FSN->Name = demangleNameScopeChain(MangledName, VTIN); if (!Error) Error = !consumeFront(MangledName, "$B"); if (!Error) VTIN->OffsetInVTable = demangleUnsigned(MangledName); if (!Error) Error = !consumeFront(MangledName, 'A'); if (!Error) FSN->Signature->CallConvention = demangleCallingConvention(MangledName); return (Error) ? nullptr : FSN; } EncodedStringLiteralNode * Demangler::demangleStringLiteral(std::string_view &MangledName) { // This function uses goto, so declare all variables up front. OutputBuffer OB; std::string_view CRC; uint64_t StringByteSize; bool IsWcharT = false; bool IsNegative = false; size_t CrcEndPos = 0; char F; EncodedStringLiteralNode *Result = Arena.alloc(); // Prefix indicating the beginning of a string literal if (!consumeFront(MangledName, "@_")) goto StringLiteralError; if (MangledName.empty()) goto StringLiteralError; // Char Type (regular or wchar_t) F = MangledName.front(); MangledName.remove_prefix(1); switch (F) { case '1': IsWcharT = true; DEMANGLE_FALLTHROUGH; case '0': break; default: goto StringLiteralError; } // Encoded Length std::tie(StringByteSize, IsNegative) = demangleNumber(MangledName); if (Error || IsNegative || StringByteSize < (IsWcharT ? 2 : 1)) goto StringLiteralError; // CRC 32 (always 8 characters plus a terminator) CrcEndPos = MangledName.find('@'); if (CrcEndPos == std::string_view::npos) goto StringLiteralError; CRC = MangledName.substr(0, CrcEndPos); MangledName.remove_prefix(CrcEndPos + 1); if (MangledName.empty()) goto StringLiteralError; if (IsWcharT) { Result->Char = CharKind::Wchar; if (StringByteSize > 64) Result->IsTruncated = true; while (!consumeFront(MangledName, '@')) { if (MangledName.size() < 2) goto StringLiteralError; wchar_t W = demangleWcharLiteral(MangledName); if (StringByteSize != 2 || Result->IsTruncated) outputEscapedChar(OB, W); StringByteSize -= 2; if (Error) goto StringLiteralError; } } else { // The max byte length is actually 32, but some compilers mangled strings // incorrectly, so we have to assume it can go higher. constexpr unsigned MaxStringByteLength = 32 * 4; uint8_t StringBytes[MaxStringByteLength]; unsigned BytesDecoded = 0; while (!consumeFront(MangledName, '@')) { if (MangledName.size() < 1 || BytesDecoded >= MaxStringByteLength) goto StringLiteralError; StringBytes[BytesDecoded++] = demangleCharLiteral(MangledName); } if (StringByteSize > BytesDecoded) Result->IsTruncated = true; unsigned CharBytes = guessCharByteSize(StringBytes, BytesDecoded, StringByteSize); assert(StringByteSize % CharBytes == 0); switch (CharBytes) { case 1: Result->Char = CharKind::Char; break; case 2: Result->Char = CharKind::Char16; break; case 4: Result->Char = CharKind::Char32; break; default: DEMANGLE_UNREACHABLE; } const unsigned NumChars = BytesDecoded / CharBytes; for (unsigned CharIndex = 0; CharIndex < NumChars; ++CharIndex) { unsigned NextChar = decodeMultiByteChar(StringBytes, CharIndex, CharBytes); if (CharIndex + 1 < NumChars || Result->IsTruncated) outputEscapedChar(OB, NextChar); } } Result->DecodedString = copyString(OB); std::free(OB.getBuffer()); return Result; StringLiteralError: Error = true; std::free(OB.getBuffer()); return nullptr; } // Returns MangledName's prefix before the first '@', or an error if // MangledName contains no '@' or the prefix has length 0. std::string_view Demangler::demangleSimpleString(std::string_view &MangledName, bool Memorize) { std::string_view S; for (size_t i = 0; i < MangledName.size(); ++i) { if (MangledName[i] != '@') continue; if (i == 0) break; S = MangledName.substr(0, i); MangledName.remove_prefix(i + 1); if (Memorize) memorizeString(S); return S; } Error = true; return {}; } NamedIdentifierNode * Demangler::demangleAnonymousNamespaceName(std::string_view &MangledName) { assert(llvm::itanium_demangle::starts_with(MangledName, "?A")); consumeFront(MangledName, "?A"); NamedIdentifierNode *Node = Arena.alloc(); Node->Name = "`anonymous namespace'"; size_t EndPos = MangledName.find('@'); if (EndPos == std::string_view::npos) { Error = true; return nullptr; } std::string_view NamespaceKey = MangledName.substr(0, EndPos); memorizeString(NamespaceKey); MangledName = MangledName.substr(EndPos + 1); return Node; } NamedIdentifierNode * Demangler::demangleLocallyScopedNamePiece(std::string_view &MangledName) { assert(startsWithLocalScopePattern(MangledName)); NamedIdentifierNode *Identifier = Arena.alloc(); consumeFront(MangledName, '?'); uint64_t Number = 0; bool IsNegative = false; std::tie(Number, IsNegative) = demangleNumber(MangledName); assert(!IsNegative); // One ? to terminate the number consumeFront(MangledName, '?'); assert(!Error); Node *Scope = parse(MangledName); if (Error) return nullptr; // Render the parent symbol's name into a buffer. OutputBuffer OB; OB << '`'; Scope->output(OB, OF_Default); OB << '\''; OB << "::`" << Number << "'"; Identifier->Name = copyString(OB); std::free(OB.getBuffer()); return Identifier; } // Parses a type name in the form of A@B@C@@ which represents C::B::A. QualifiedNameNode * Demangler::demangleFullyQualifiedTypeName(std::string_view &MangledName) { IdentifierNode *Identifier = demangleUnqualifiedTypeName(MangledName, /*Memorize=*/true); if (Error) return nullptr; assert(Identifier); QualifiedNameNode *QN = demangleNameScopeChain(MangledName, Identifier); if (Error) return nullptr; assert(QN); return QN; } // Parses a symbol name in the form of A@B@C@@ which represents C::B::A. // Symbol names have slightly different rules regarding what can appear // so we separate out the implementations for flexibility. QualifiedNameNode * Demangler::demangleFullyQualifiedSymbolName(std::string_view &MangledName) { // This is the final component of a symbol name (i.e. the leftmost component // of a mangled name. Since the only possible template instantiation that // can appear in this context is a function template, and since those are // not saved for the purposes of name backreferences, only backref simple // names. IdentifierNode *Identifier = demangleUnqualifiedSymbolName(MangledName, NBB_Simple); if (Error) return nullptr; QualifiedNameNode *QN = demangleNameScopeChain(MangledName, Identifier); if (Error) return nullptr; if (Identifier->kind() == NodeKind::StructorIdentifier) { if (QN->Components->Count < 2) { Error = true; return nullptr; } StructorIdentifierNode *SIN = static_cast(Identifier); Node *ClassNode = QN->Components->Nodes[QN->Components->Count - 2]; SIN->Class = static_cast(ClassNode); } assert(QN); return QN; } IdentifierNode * Demangler::demangleUnqualifiedTypeName(std::string_view &MangledName, bool Memorize) { // An inner-most name can be a back-reference, because a fully-qualified name // (e.g. Scope + Inner) can contain other fully qualified names inside of // them (for example template parameters), and these nested parameters can // refer to previously mangled types. if (startsWithDigit(MangledName)) return demangleBackRefName(MangledName); if (llvm::itanium_demangle::starts_with(MangledName, "?$")) return demangleTemplateInstantiationName(MangledName, NBB_Template); return demangleSimpleName(MangledName, Memorize); } IdentifierNode * Demangler::demangleUnqualifiedSymbolName(std::string_view &MangledName, NameBackrefBehavior NBB) { if (startsWithDigit(MangledName)) return demangleBackRefName(MangledName); if (llvm::itanium_demangle::starts_with(MangledName, "?$")) return demangleTemplateInstantiationName(MangledName, NBB); if (llvm::itanium_demangle::starts_with(MangledName, '?')) return demangleFunctionIdentifierCode(MangledName); return demangleSimpleName(MangledName, /*Memorize=*/(NBB & NBB_Simple) != 0); } IdentifierNode * Demangler::demangleNameScopePiece(std::string_view &MangledName) { if (startsWithDigit(MangledName)) return demangleBackRefName(MangledName); if (llvm::itanium_demangle::starts_with(MangledName, "?$")) return demangleTemplateInstantiationName(MangledName, NBB_Template); if (llvm::itanium_demangle::starts_with(MangledName, "?A")) return demangleAnonymousNamespaceName(MangledName); if (startsWithLocalScopePattern(MangledName)) return demangleLocallyScopedNamePiece(MangledName); return demangleSimpleName(MangledName, /*Memorize=*/true); } static NodeArrayNode *nodeListToNodeArray(ArenaAllocator &Arena, NodeList *Head, size_t Count) { NodeArrayNode *N = Arena.alloc(); N->Count = Count; N->Nodes = Arena.allocArray(Count); for (size_t I = 0; I < Count; ++I) { N->Nodes[I] = Head->N; Head = Head->Next; } return N; } QualifiedNameNode * Demangler::demangleNameScopeChain(std::string_view &MangledName, IdentifierNode *UnqualifiedName) { NodeList *Head = Arena.alloc(); Head->N = UnqualifiedName; size_t Count = 1; while (!consumeFront(MangledName, "@")) { ++Count; NodeList *NewHead = Arena.alloc(); NewHead->Next = Head; Head = NewHead; if (MangledName.empty()) { Error = true; return nullptr; } assert(!Error); IdentifierNode *Elem = demangleNameScopePiece(MangledName); if (Error) return nullptr; Head->N = Elem; } QualifiedNameNode *QN = Arena.alloc(); QN->Components = nodeListToNodeArray(Arena, Head, Count); return QN; } FuncClass Demangler::demangleFunctionClass(std::string_view &MangledName) { const char F = MangledName.front(); MangledName.remove_prefix(1); switch (F) { case '9': return FuncClass(FC_ExternC | FC_NoParameterList); case 'A': return FC_Private; case 'B': return FuncClass(FC_Private | FC_Far); case 'C': return FuncClass(FC_Private | FC_Static); case 'D': return FuncClass(FC_Private | FC_Static | FC_Far); case 'E': return FuncClass(FC_Private | FC_Virtual); case 'F': return FuncClass(FC_Private | FC_Virtual | FC_Far); case 'G': return FuncClass(FC_Private | FC_StaticThisAdjust); case 'H': return FuncClass(FC_Private | FC_StaticThisAdjust | FC_Far); case 'I': return FuncClass(FC_Protected); case 'J': return FuncClass(FC_Protected | FC_Far); case 'K': return FuncClass(FC_Protected | FC_Static); case 'L': return FuncClass(FC_Protected | FC_Static | FC_Far); case 'M': return FuncClass(FC_Protected | FC_Virtual); case 'N': return FuncClass(FC_Protected | FC_Virtual | FC_Far); case 'O': return FuncClass(FC_Protected | FC_Virtual | FC_StaticThisAdjust); case 'P': return FuncClass(FC_Protected | FC_Virtual | FC_StaticThisAdjust | FC_Far); case 'Q': return FuncClass(FC_Public); case 'R': return FuncClass(FC_Public | FC_Far); case 'S': return FuncClass(FC_Public | FC_Static); case 'T': return FuncClass(FC_Public | FC_Static | FC_Far); case 'U': return FuncClass(FC_Public | FC_Virtual); case 'V': return FuncClass(FC_Public | FC_Virtual | FC_Far); case 'W': return FuncClass(FC_Public | FC_Virtual | FC_StaticThisAdjust); case 'X': return FuncClass(FC_Public | FC_Virtual | FC_StaticThisAdjust | FC_Far); case 'Y': return FuncClass(FC_Global); case 'Z': return FuncClass(FC_Global | FC_Far); case '$': { FuncClass VFlag = FC_VirtualThisAdjust; if (consumeFront(MangledName, 'R')) VFlag = FuncClass(VFlag | FC_VirtualThisAdjustEx); if (MangledName.empty()) break; const char F = MangledName.front(); MangledName.remove_prefix(1); switch (F) { case '0': return FuncClass(FC_Private | FC_Virtual | VFlag); case '1': return FuncClass(FC_Private | FC_Virtual | VFlag | FC_Far); case '2': return FuncClass(FC_Protected | FC_Virtual | VFlag); case '3': return FuncClass(FC_Protected | FC_Virtual | VFlag | FC_Far); case '4': return FuncClass(FC_Public | FC_Virtual | VFlag); case '5': return FuncClass(FC_Public | FC_Virtual | VFlag | FC_Far); } } } Error = true; return FC_Public; } CallingConv Demangler::demangleCallingConvention(std::string_view &MangledName) { if (MangledName.empty()) { Error = true; return CallingConv::None; } const char F = MangledName.front(); MangledName.remove_prefix(1); switch (F) { case 'A': case 'B': return CallingConv::Cdecl; case 'C': case 'D': return CallingConv::Pascal; case 'E': case 'F': return CallingConv::Thiscall; case 'G': case 'H': return CallingConv::Stdcall; case 'I': case 'J': return CallingConv::Fastcall; case 'M': case 'N': return CallingConv::Clrcall; case 'O': case 'P': return CallingConv::Eabi; case 'Q': return CallingConv::Vectorcall; case 'S': return CallingConv::Swift; case 'W': return CallingConv::SwiftAsync; } return CallingConv::None; } StorageClass Demangler::demangleVariableStorageClass(std::string_view &MangledName) { assert(MangledName.front() >= '0' && MangledName.front() <= '4'); const char F = MangledName.front(); MangledName.remove_prefix(1); switch (F) { case '0': return StorageClass::PrivateStatic; case '1': return StorageClass::ProtectedStatic; case '2': return StorageClass::PublicStatic; case '3': return StorageClass::Global; case '4': return StorageClass::FunctionLocalStatic; } DEMANGLE_UNREACHABLE; } std::pair Demangler::demangleQualifiers(std::string_view &MangledName) { if (MangledName.empty()) { Error = true; return std::make_pair(Q_None, false); } const char F = MangledName.front(); MangledName.remove_prefix(1); switch (F) { // Member qualifiers case 'Q': return std::make_pair(Q_None, true); case 'R': return std::make_pair(Q_Const, true); case 'S': return std::make_pair(Q_Volatile, true); case 'T': return std::make_pair(Qualifiers(Q_Const | Q_Volatile), true); // Non-Member qualifiers case 'A': return std::make_pair(Q_None, false); case 'B': return std::make_pair(Q_Const, false); case 'C': return std::make_pair(Q_Volatile, false); case 'D': return std::make_pair(Qualifiers(Q_Const | Q_Volatile), false); } Error = true; return std::make_pair(Q_None, false); } // ::= // ::= # pointers, references TypeNode *Demangler::demangleType(std::string_view &MangledName, QualifierMangleMode QMM) { Qualifiers Quals = Q_None; bool IsMember = false; if (QMM == QualifierMangleMode::Mangle) { std::tie(Quals, IsMember) = demangleQualifiers(MangledName); } else if (QMM == QualifierMangleMode::Result) { if (consumeFront(MangledName, '?')) std::tie(Quals, IsMember) = demangleQualifiers(MangledName); } if (MangledName.empty()) { Error = true; return nullptr; } TypeNode *Ty = nullptr; if (isTagType(MangledName)) Ty = demangleClassType(MangledName); else if (isPointerType(MangledName)) { if (isMemberPointer(MangledName, Error)) Ty = demangleMemberPointerType(MangledName); else if (!Error) Ty = demanglePointerType(MangledName); else return nullptr; } else if (isArrayType(MangledName)) Ty = demangleArrayType(MangledName); else if (isFunctionType(MangledName)) { if (consumeFront(MangledName, "$$A8@@")) Ty = demangleFunctionType(MangledName, true); else { assert(llvm::itanium_demangle::starts_with(MangledName, "$$A6")); consumeFront(MangledName, "$$A6"); Ty = demangleFunctionType(MangledName, false); } } else if (isCustomType(MangledName)) { Ty = demangleCustomType(MangledName); } else { Ty = demanglePrimitiveType(MangledName); } if (!Ty || Error) return Ty; Ty->Quals = Qualifiers(Ty->Quals | Quals); return Ty; } bool Demangler::demangleThrowSpecification(std::string_view &MangledName) { if (consumeFront(MangledName, "_E")) return true; if (consumeFront(MangledName, 'Z')) return false; Error = true; return false; } FunctionSignatureNode * Demangler::demangleFunctionType(std::string_view &MangledName, bool HasThisQuals) { FunctionSignatureNode *FTy = Arena.alloc(); if (HasThisQuals) { FTy->Quals = demanglePointerExtQualifiers(MangledName); FTy->RefQualifier = demangleFunctionRefQualifier(MangledName); FTy->Quals = Qualifiers(FTy->Quals | demangleQualifiers(MangledName).first); } // Fields that appear on both member and non-member functions. FTy->CallConvention = demangleCallingConvention(MangledName); // ::= // ::= @ # structors (they have no declared return type) bool IsStructor = consumeFront(MangledName, '@'); if (!IsStructor) FTy->ReturnType = demangleType(MangledName, QualifierMangleMode::Result); FTy->Params = demangleFunctionParameterList(MangledName, FTy->IsVariadic); FTy->IsNoexcept = demangleThrowSpecification(MangledName); return FTy; } FunctionSymbolNode * Demangler::demangleFunctionEncoding(std::string_view &MangledName) { FuncClass ExtraFlags = FC_None; if (consumeFront(MangledName, "$$J0")) ExtraFlags = FC_ExternC; if (MangledName.empty()) { Error = true; return nullptr; } FuncClass FC = demangleFunctionClass(MangledName); FC = FuncClass(ExtraFlags | FC); FunctionSignatureNode *FSN = nullptr; ThunkSignatureNode *TTN = nullptr; if (FC & FC_StaticThisAdjust) { TTN = Arena.alloc(); TTN->ThisAdjust.StaticOffset = demangleSigned(MangledName); } else if (FC & FC_VirtualThisAdjust) { TTN = Arena.alloc(); if (FC & FC_VirtualThisAdjustEx) { TTN->ThisAdjust.VBPtrOffset = demangleSigned(MangledName); TTN->ThisAdjust.VBOffsetOffset = demangleSigned(MangledName); } TTN->ThisAdjust.VtordispOffset = demangleSigned(MangledName); TTN->ThisAdjust.StaticOffset = demangleSigned(MangledName); } if (FC & FC_NoParameterList) { // This is an extern "C" function whose full signature hasn't been mangled. // This happens when we need to mangle a local symbol inside of an extern // "C" function. FSN = Arena.alloc(); } else { bool HasThisQuals = !(FC & (FC_Global | FC_Static)); FSN = demangleFunctionType(MangledName, HasThisQuals); } if (Error) return nullptr; if (TTN) { *static_cast(TTN) = *FSN; FSN = TTN; } FSN->FunctionClass = FC; FunctionSymbolNode *Symbol = Arena.alloc(); Symbol->Signature = FSN; return Symbol; } CustomTypeNode *Demangler::demangleCustomType(std::string_view &MangledName) { assert(llvm::itanium_demangle::starts_with(MangledName, '?')); MangledName.remove_prefix(1); CustomTypeNode *CTN = Arena.alloc(); CTN->Identifier = demangleUnqualifiedTypeName(MangledName, /*Memorize=*/true); if (!consumeFront(MangledName, '@')) Error = true; if (Error) return nullptr; return CTN; } // Reads a primitive type. PrimitiveTypeNode * Demangler::demanglePrimitiveType(std::string_view &MangledName) { if (consumeFront(MangledName, "$$T")) return Arena.alloc(PrimitiveKind::Nullptr); const char F = MangledName.front(); MangledName.remove_prefix(1); switch (F) { case 'X': return Arena.alloc(PrimitiveKind::Void); case 'D': return Arena.alloc(PrimitiveKind::Char); case 'C': return Arena.alloc(PrimitiveKind::Schar); case 'E': return Arena.alloc(PrimitiveKind::Uchar); case 'F': return Arena.alloc(PrimitiveKind::Short); case 'G': return Arena.alloc(PrimitiveKind::Ushort); case 'H': return Arena.alloc(PrimitiveKind::Int); case 'I': return Arena.alloc(PrimitiveKind::Uint); case 'J': return Arena.alloc(PrimitiveKind::Long); case 'K': return Arena.alloc(PrimitiveKind::Ulong); case 'M': return Arena.alloc(PrimitiveKind::Float); case 'N': return Arena.alloc(PrimitiveKind::Double); case 'O': return Arena.alloc(PrimitiveKind::Ldouble); case '_': { if (MangledName.empty()) { Error = true; return nullptr; } const char F = MangledName.front(); MangledName.remove_prefix(1); switch (F) { case 'N': return Arena.alloc(PrimitiveKind::Bool); case 'J': return Arena.alloc(PrimitiveKind::Int64); case 'K': return Arena.alloc(PrimitiveKind::Uint64); case 'W': return Arena.alloc(PrimitiveKind::Wchar); case 'Q': return Arena.alloc(PrimitiveKind::Char8); case 'S': return Arena.alloc(PrimitiveKind::Char16); case 'U': return Arena.alloc(PrimitiveKind::Char32); } break; } } Error = true; return nullptr; } TagTypeNode *Demangler::demangleClassType(std::string_view &MangledName) { TagTypeNode *TT = nullptr; const char F = MangledName.front(); MangledName.remove_prefix(1); switch (F) { case 'T': TT = Arena.alloc(TagKind::Union); break; case 'U': TT = Arena.alloc(TagKind::Struct); break; case 'V': TT = Arena.alloc(TagKind::Class); break; case 'W': if (!consumeFront(MangledName, '4')) { Error = true; return nullptr; } TT = Arena.alloc(TagKind::Enum); break; default: assert(false); } TT->QualifiedName = demangleFullyQualifiedTypeName(MangledName); return TT; } // ::= E? // # the E is required for 64-bit non-static pointers PointerTypeNode *Demangler::demanglePointerType(std::string_view &MangledName) { PointerTypeNode *Pointer = Arena.alloc(); std::tie(Pointer->Quals, Pointer->Affinity) = demanglePointerCVQualifiers(MangledName); if (consumeFront(MangledName, "6")) { Pointer->Pointee = demangleFunctionType(MangledName, false); return Pointer; } Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName); Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals); Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Mangle); return Pointer; } PointerTypeNode * Demangler::demangleMemberPointerType(std::string_view &MangledName) { PointerTypeNode *Pointer = Arena.alloc(); std::tie(Pointer->Quals, Pointer->Affinity) = demanglePointerCVQualifiers(MangledName); assert(Pointer->Affinity == PointerAffinity::Pointer); Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName); Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals); // isMemberPointer() only returns true if there is at least one character // after the qualifiers. if (consumeFront(MangledName, "8")) { Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName); Pointer->Pointee = demangleFunctionType(MangledName, true); } else { Qualifiers PointeeQuals = Q_None; bool IsMember = false; std::tie(PointeeQuals, IsMember) = demangleQualifiers(MangledName); assert(IsMember || Error); Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName); Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Drop); if (Pointer->Pointee) Pointer->Pointee->Quals = PointeeQuals; } return Pointer; } Qualifiers Demangler::demanglePointerExtQualifiers(std::string_view &MangledName) { Qualifiers Quals = Q_None; if (consumeFront(MangledName, 'E')) Quals = Qualifiers(Quals | Q_Pointer64); if (consumeFront(MangledName, 'I')) Quals = Qualifiers(Quals | Q_Restrict); if (consumeFront(MangledName, 'F')) Quals = Qualifiers(Quals | Q_Unaligned); return Quals; } ArrayTypeNode *Demangler::demangleArrayType(std::string_view &MangledName) { assert(MangledName.front() == 'Y'); MangledName.remove_prefix(1); uint64_t Rank = 0; bool IsNegative = false; std::tie(Rank, IsNegative) = demangleNumber(MangledName); if (IsNegative || Rank == 0) { Error = true; return nullptr; } ArrayTypeNode *ATy = Arena.alloc(); NodeList *Head = Arena.alloc(); NodeList *Tail = Head; for (uint64_t I = 0; I < Rank; ++I) { uint64_t D = 0; std::tie(D, IsNegative) = demangleNumber(MangledName); if (Error || IsNegative) { Error = true; return nullptr; } Tail->N = Arena.alloc(D, IsNegative); if (I + 1 < Rank) { Tail->Next = Arena.alloc(); Tail = Tail->Next; } } ATy->Dimensions = nodeListToNodeArray(Arena, Head, Rank); if (consumeFront(MangledName, "$$C")) { bool IsMember = false; std::tie(ATy->Quals, IsMember) = demangleQualifiers(MangledName); if (IsMember) { Error = true; return nullptr; } } ATy->ElementType = demangleType(MangledName, QualifierMangleMode::Drop); return ATy; } // Reads a function's parameters. NodeArrayNode * Demangler::demangleFunctionParameterList(std::string_view &MangledName, bool &IsVariadic) { // Empty parameter list. if (consumeFront(MangledName, 'X')) return nullptr; NodeList *Head = Arena.alloc(); NodeList **Current = &Head; size_t Count = 0; while (!Error && !llvm::itanium_demangle::starts_with(MangledName, '@') && !llvm::itanium_demangle::starts_with(MangledName, 'Z')) { ++Count; if (startsWithDigit(MangledName)) { size_t N = MangledName[0] - '0'; if (N >= Backrefs.FunctionParamCount) { Error = true; return nullptr; } MangledName.remove_prefix(1); *Current = Arena.alloc(); (*Current)->N = Backrefs.FunctionParams[N]; Current = &(*Current)->Next; continue; } size_t OldSize = MangledName.size(); *Current = Arena.alloc(); TypeNode *TN = demangleType(MangledName, QualifierMangleMode::Drop); if (!TN || Error) return nullptr; (*Current)->N = TN; size_t CharsConsumed = OldSize - MangledName.size(); assert(CharsConsumed != 0); // Single-letter types are ignored for backreferences because memorizing // them doesn't save anything. if (Backrefs.FunctionParamCount <= 9 && CharsConsumed > 1) Backrefs.FunctionParams[Backrefs.FunctionParamCount++] = TN; Current = &(*Current)->Next; } if (Error) return nullptr; NodeArrayNode *NA = nodeListToNodeArray(Arena, Head, Count); // A non-empty parameter list is terminated by either 'Z' (variadic) parameter // list or '@' (non variadic). Careful not to consume "@Z", as in that case // the following Z could be a throw specifier. if (consumeFront(MangledName, '@')) return NA; if (consumeFront(MangledName, 'Z')) { IsVariadic = true; return NA; } DEMANGLE_UNREACHABLE; } NodeArrayNode * Demangler::demangleTemplateParameterList(std::string_view &MangledName) { NodeList *Head = nullptr; NodeList **Current = &Head; size_t Count = 0; while (!llvm::itanium_demangle::starts_with(MangledName, '@')) { if (consumeFront(MangledName, "$S") || consumeFront(MangledName, "$$V") || consumeFront(MangledName, "$$$V") || consumeFront(MangledName, "$$Z")) { // parameter pack separator continue; } ++Count; // Template parameter lists don't participate in back-referencing. *Current = Arena.alloc(); NodeList &TP = **Current; // ::= $ M const bool IsAutoNTTP = consumeFront(MangledName, "$M"); if (IsAutoNTTP) { // The deduced type of the auto NTTP parameter isn't printed so // we want to ignore the AST created from demangling the type. // // TODO: Avoid the extra allocations to the bump allocator in this case. (void)demangleType(MangledName, QualifierMangleMode::Drop); if (Error) return nullptr; } TemplateParameterReferenceNode *TPRN = nullptr; if (consumeFront(MangledName, "$$Y")) { // Template alias TP.N = demangleFullyQualifiedTypeName(MangledName); } else if (consumeFront(MangledName, "$$B")) { // Array TP.N = demangleType(MangledName, QualifierMangleMode::Drop); } else if (consumeFront(MangledName, "$$C")) { // Type has qualifiers. TP.N = demangleType(MangledName, QualifierMangleMode::Mangle); } else if (startsWith(MangledName, "$1", "1", !IsAutoNTTP) || startsWith(MangledName, "$H", "H", !IsAutoNTTP) || startsWith(MangledName, "$I", "I", !IsAutoNTTP) || startsWith(MangledName, "$J", "J", !IsAutoNTTP)) { // Pointer to member TP.N = TPRN = Arena.alloc(); TPRN->IsMemberPointer = true; if (!IsAutoNTTP) MangledName.remove_prefix(1); // Remove leading '$' // 1 - single inheritance // H - multiple inheritance // I - virtual inheritance // J - unspecified inheritance char InheritanceSpecifier = MangledName.front(); MangledName.remove_prefix(1); SymbolNode *S = nullptr; if (llvm::itanium_demangle::starts_with(MangledName, '?')) { S = parse(MangledName); if (Error || !S->Name) { Error = true; return nullptr; } memorizeIdentifier(S->Name->getUnqualifiedIdentifier()); } switch (InheritanceSpecifier) { case 'J': TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = demangleSigned(MangledName); DEMANGLE_FALLTHROUGH; case 'I': TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = demangleSigned(MangledName); DEMANGLE_FALLTHROUGH; case 'H': TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = demangleSigned(MangledName); DEMANGLE_FALLTHROUGH; case '1': break; default: DEMANGLE_UNREACHABLE; } TPRN->Affinity = PointerAffinity::Pointer; TPRN->Symbol = S; } else if (llvm::itanium_demangle::starts_with(MangledName, "$E?")) { consumeFront(MangledName, "$E"); // Reference to symbol TP.N = TPRN = Arena.alloc(); TPRN->Symbol = parse(MangledName); TPRN->Affinity = PointerAffinity::Reference; } else if (startsWith(MangledName, "$F", "F", !IsAutoNTTP) || startsWith(MangledName, "$G", "G", !IsAutoNTTP)) { TP.N = TPRN = Arena.alloc(); // Data member pointer. if (!IsAutoNTTP) MangledName.remove_prefix(1); // Remove leading '$' char InheritanceSpecifier = MangledName.front(); MangledName.remove_prefix(1); switch (InheritanceSpecifier) { case 'G': TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = demangleSigned(MangledName); DEMANGLE_FALLTHROUGH; case 'F': TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = demangleSigned(MangledName); TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = demangleSigned(MangledName); break; default: DEMANGLE_UNREACHABLE; } TPRN->IsMemberPointer = true; } else if (consumeFront(MangledName, "$0", "0", !IsAutoNTTP)) { // Integral non-type template parameter bool IsNegative = false; uint64_t Value = 0; std::tie(Value, IsNegative) = demangleNumber(MangledName); TP.N = Arena.alloc(Value, IsNegative); } else { TP.N = demangleType(MangledName, QualifierMangleMode::Drop); } if (Error) return nullptr; Current = &TP.Next; } // The loop above returns nullptr on Error. assert(!Error); // Template parameter lists cannot be variadic, so it can only be terminated // by @ (as opposed to 'Z' in the function parameter case). assert(llvm::itanium_demangle::starts_with( MangledName, '@')); // The above loop exits only on '@'. consumeFront(MangledName, '@'); return nodeListToNodeArray(Arena, Head, Count); } void Demangler::dumpBackReferences() { std::printf("%d function parameter backreferences\n", (int)Backrefs.FunctionParamCount); // Create an output stream so we can render each type. OutputBuffer OB; for (size_t I = 0; I < Backrefs.FunctionParamCount; ++I) { OB.setCurrentPosition(0); TypeNode *T = Backrefs.FunctionParams[I]; T->output(OB, OF_Default); std::string_view B = OB; std::printf(" [%d] - %.*s\n", (int)I, (int)B.size(), B.data()); } std::free(OB.getBuffer()); if (Backrefs.FunctionParamCount > 0) std::printf("\n"); std::printf("%d name backreferences\n", (int)Backrefs.NamesCount); for (size_t I = 0; I < Backrefs.NamesCount; ++I) { std::printf(" [%d] - %.*s\n", (int)I, (int)Backrefs.Names[I]->Name.size(), Backrefs.Names[I]->Name.data()); } if (Backrefs.NamesCount > 0) std::printf("\n"); } +std::optional +llvm::getArm64ECInsertionPointInMangledName(std::string_view MangledName) { + std::string_view ProcessedName{MangledName}; + + // We only support this for MSVC-style C++ symbols. + if (!consumeFront(ProcessedName, '?')) + return std::nullopt; + + // The insertion point is just after the name of the symbol, so parse that to + // remove it from the processed name. + Demangler D; + D.demangleFullyQualifiedSymbolName(ProcessedName); + if (D.Error) + return std::nullopt; + + return MangledName.length() - ProcessedName.length(); +} + char *llvm::microsoftDemangle(std::string_view MangledName, size_t *NMangled, int *Status, MSDemangleFlags Flags) { Demangler D; std::string_view Name{MangledName}; SymbolNode *AST = D.parse(Name); if (!D.Error && NMangled) *NMangled = MangledName.size() - Name.size(); if (Flags & MSDF_DumpBackrefs) D.dumpBackReferences(); OutputFlags OF = OF_Default; if (Flags & MSDF_NoCallingConvention) OF = OutputFlags(OF | OF_NoCallingConvention); if (Flags & MSDF_NoAccessSpecifier) OF = OutputFlags(OF | OF_NoAccessSpecifier); if (Flags & MSDF_NoReturnType) OF = OutputFlags(OF | OF_NoReturnType); if (Flags & MSDF_NoMemberType) OF = OutputFlags(OF | OF_NoMemberType); if (Flags & MSDF_NoVariableType) OF = OutputFlags(OF | OF_NoVariableType); int InternalStatus = demangle_success; char *Buf; if (D.Error) InternalStatus = demangle_invalid_mangled_name; else { OutputBuffer OB; AST->output(OB, OF); OB += '\0'; Buf = OB.getBuffer(); } if (Status) *Status = InternalStatus; return InternalStatus == demangle_success ? Buf : nullptr; } diff --git a/llvm/lib/IR/Mangler.cpp b/llvm/lib/IR/Mangler.cpp index e6c3ea9d5688..884739b3212c 100644 --- a/llvm/lib/IR/Mangler.cpp +++ b/llvm/lib/IR/Mangler.cpp @@ -1,331 +1,327 @@ //===-- Mangler.cpp - Self-contained c/asm llvm name mangler --------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // Unified name mangler for assembly backends. // //===----------------------------------------------------------------------===// #include "llvm/IR/Mangler.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" +#include "llvm/Demangle/Demangle.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TargetParser/Triple.h" using namespace llvm; namespace { enum ManglerPrefixTy { Default, ///< Emit default string before each symbol. Private, ///< Emit "private" prefix before each symbol. LinkerPrivate ///< Emit "linker private" prefix before each symbol. }; } static void getNameWithPrefixImpl(raw_ostream &OS, const Twine &GVName, ManglerPrefixTy PrefixTy, const DataLayout &DL, char Prefix) { SmallString<256> TmpData; StringRef Name = GVName.toStringRef(TmpData); assert(!Name.empty() && "getNameWithPrefix requires non-empty name"); // No need to do anything special if the global has the special "do not // mangle" flag in the name. if (Name[0] == '\1') { OS << Name.substr(1); return; } if (DL.doNotMangleLeadingQuestionMark() && Name[0] == '?') Prefix = '\0'; if (PrefixTy == Private) OS << DL.getPrivateGlobalPrefix(); else if (PrefixTy == LinkerPrivate) OS << DL.getLinkerPrivateGlobalPrefix(); if (Prefix != '\0') OS << Prefix; // If this is a simple string that doesn't need escaping, just append it. OS << Name; } static void getNameWithPrefixImpl(raw_ostream &OS, const Twine &GVName, const DataLayout &DL, ManglerPrefixTy PrefixTy) { char Prefix = DL.getGlobalPrefix(); return getNameWithPrefixImpl(OS, GVName, PrefixTy, DL, Prefix); } void Mangler::getNameWithPrefix(raw_ostream &OS, const Twine &GVName, const DataLayout &DL) { return getNameWithPrefixImpl(OS, GVName, DL, Default); } void Mangler::getNameWithPrefix(SmallVectorImpl &OutName, const Twine &GVName, const DataLayout &DL) { raw_svector_ostream OS(OutName); char Prefix = DL.getGlobalPrefix(); return getNameWithPrefixImpl(OS, GVName, Default, DL, Prefix); } static bool hasByteCountSuffix(CallingConv::ID CC) { switch (CC) { case CallingConv::X86_FastCall: case CallingConv::X86_StdCall: case CallingConv::X86_VectorCall: return true; default: return false; } } /// Microsoft fastcall and stdcall functions require a suffix on their name /// indicating the number of words of arguments they take. static void addByteCountSuffix(raw_ostream &OS, const Function *F, const DataLayout &DL) { // Calculate arguments size total. unsigned ArgWords = 0; const unsigned PtrSize = DL.getPointerSize(); for (const Argument &A : F->args()) { // For the purposes of the byte count suffix, structs returned by pointer // do not count as function arguments. if (A.hasStructRetAttr()) continue; // 'Dereference' type in case of byval or inalloca parameter attribute. uint64_t AllocSize = A.hasPassPointeeByValueCopyAttr() ? A.getPassPointeeByValueCopySize(DL) : DL.getTypeAllocSize(A.getType()); // Size should be aligned to pointer size. ArgWords += alignTo(AllocSize, PtrSize); } OS << '@' << ArgWords; } void Mangler::getNameWithPrefix(raw_ostream &OS, const GlobalValue *GV, bool CannotUsePrivateLabel) const { ManglerPrefixTy PrefixTy = Default; assert(GV != nullptr && "Invalid Global Value"); if (GV->hasPrivateLinkage()) { if (CannotUsePrivateLabel) PrefixTy = LinkerPrivate; else PrefixTy = Private; } const DataLayout &DL = GV->getDataLayout(); if (!GV->hasName()) { // Get the ID for the global, assigning a new one if we haven't got one // already. unsigned &ID = AnonGlobalIDs[GV]; if (ID == 0) ID = AnonGlobalIDs.size(); // Must mangle the global into a unique ID. getNameWithPrefixImpl(OS, "__unnamed_" + Twine(ID), DL, PrefixTy); return; } StringRef Name = GV->getName(); char Prefix = DL.getGlobalPrefix(); // Mangle functions with Microsoft calling conventions specially. Only do // this mangling for x86_64 vectorcall and 32-bit x86. const Function *MSFunc = dyn_cast_or_null(GV->getAliaseeObject()); // Don't add byte count suffixes when '\01' or '?' are in the first // character. if (Name.starts_with("\01") || (DL.doNotMangleLeadingQuestionMark() && Name.starts_with("?"))) MSFunc = nullptr; CallingConv::ID CC = MSFunc ? MSFunc->getCallingConv() : (unsigned)CallingConv::C; if (!DL.hasMicrosoftFastStdCallMangling() && CC != CallingConv::X86_VectorCall) MSFunc = nullptr; if (MSFunc) { if (CC == CallingConv::X86_FastCall) Prefix = '@'; // fastcall functions have an @ prefix instead of _. else if (CC == CallingConv::X86_VectorCall) Prefix = '\0'; // vectorcall functions have no prefix. } getNameWithPrefixImpl(OS, Name, PrefixTy, DL, Prefix); if (!MSFunc) return; // If we are supposed to add a microsoft-style suffix for stdcall, fastcall, // or vectorcall, add it. These functions have a suffix of @N where N is the // cumulative byte size of all of the parameters to the function in decimal. if (CC == CallingConv::X86_VectorCall) OS << '@'; // vectorcall functions use a double @ suffix. FunctionType *FT = MSFunc->getFunctionType(); if (hasByteCountSuffix(CC) && // "Pure" variadic functions do not receive @0 suffix. (!FT->isVarArg() || FT->getNumParams() == 0 || (FT->getNumParams() == 1 && MSFunc->hasStructRetAttr()))) addByteCountSuffix(OS, MSFunc, DL); } void Mangler::getNameWithPrefix(SmallVectorImpl &OutName, const GlobalValue *GV, bool CannotUsePrivateLabel) const { raw_svector_ostream OS(OutName); getNameWithPrefix(OS, GV, CannotUsePrivateLabel); } // Check if the name needs quotes to be safe for the linker to interpret. static bool canBeUnquotedInDirective(char C) { return isAlnum(C) || C == '_' || C == '@' || C == '#'; } static bool canBeUnquotedInDirective(StringRef Name) { if (Name.empty()) return false; // If any of the characters in the string is an unacceptable character, force // quotes. for (char C : Name) { if (!canBeUnquotedInDirective(C)) return false; } return true; } void llvm::emitLinkerFlagsForGlobalCOFF(raw_ostream &OS, const GlobalValue *GV, const Triple &TT, Mangler &Mangler) { if (GV->hasDLLExportStorageClass() && !GV->isDeclaration()) { if (TT.isWindowsMSVCEnvironment()) OS << " /EXPORT:"; else OS << " -export:"; bool NeedQuotes = GV->hasName() && !canBeUnquotedInDirective(GV->getName()); if (NeedQuotes) OS << "\""; if (TT.isWindowsGNUEnvironment() || TT.isWindowsCygwinEnvironment()) { std::string Flag; raw_string_ostream FlagOS(Flag); Mangler.getNameWithPrefix(FlagOS, GV, false); FlagOS.flush(); if (Flag[0] == GV->getDataLayout().getGlobalPrefix()) OS << Flag.substr(1); else OS << Flag; } else { Mangler.getNameWithPrefix(OS, GV, false); } if (TT.isWindowsArm64EC()) { // Use EXPORTAS for mangled ARM64EC symbols. // FIXME: During LTO, we're invoked prior to the EC lowering pass, // so symbols are not yet mangled. Emitting the unmangled name // typically functions correctly; the linker can resolve the export // with the demangled alias. if (std::optional demangledName = getArm64ECDemangledFunctionName(GV->getName())) OS << ",EXPORTAS," << *demangledName; } if (NeedQuotes) OS << "\""; if (!GV->getValueType()->isFunctionTy()) { if (TT.isWindowsMSVCEnvironment()) OS << ",DATA"; else OS << ",data"; } } if (GV->hasHiddenVisibility() && !GV->isDeclaration() && TT.isOSCygMing()) { OS << " -exclude-symbols:"; bool NeedQuotes = GV->hasName() && !canBeUnquotedInDirective(GV->getName()); if (NeedQuotes) OS << "\""; std::string Flag; raw_string_ostream FlagOS(Flag); Mangler.getNameWithPrefix(FlagOS, GV, false); FlagOS.flush(); if (Flag[0] == GV->getDataLayout().getGlobalPrefix()) OS << Flag.substr(1); else OS << Flag; if (NeedQuotes) OS << "\""; } } void llvm::emitLinkerFlagsForUsedCOFF(raw_ostream &OS, const GlobalValue *GV, const Triple &T, Mangler &M) { if (!T.isWindowsMSVCEnvironment()) return; OS << " /INCLUDE:"; bool NeedQuotes = GV->hasName() && !canBeUnquotedInDirective(GV->getName()); if (NeedQuotes) OS << "\""; M.getNameWithPrefix(OS, GV, false); if (NeedQuotes) OS << "\""; } std::optional llvm::getArm64ECMangledFunctionName(StringRef Name) { - bool IsCppFn = Name[0] == '?'; - if (IsCppFn && Name.contains("$$h")) - return std::nullopt; - if (!IsCppFn && Name[0] == '#') + if (Name[0] != '?') { + // For non-C++ symbols, prefix the name with "#" unless it's already + // mangled. + if (Name[0] == '#') + return std::nullopt; + return std::optional(("#" + Name).str()); + } + + // If the name contains $$h, then it is already mangled. + if (Name.contains("$$h")) return std::nullopt; - StringRef Prefix = "$$h"; - size_t InsertIdx = 0; - if (IsCppFn) { - InsertIdx = Name.find("@@"); - size_t ThreeAtSignsIdx = Name.find("@@@"); - if (InsertIdx != std::string::npos && InsertIdx != ThreeAtSignsIdx) { - InsertIdx += 2; - } else { - InsertIdx = Name.find("@"); - if (InsertIdx != std::string::npos) - InsertIdx++; - } - } else { - Prefix = "#"; - } + // Ask the demangler where we should insert "$$h". + auto InsertIdx = getArm64ECInsertionPointInMangledName(Name); + if (!InsertIdx) + return std::nullopt; return std::optional( - (Name.substr(0, InsertIdx) + Prefix + Name.substr(InsertIdx)).str()); + (Name.substr(0, *InsertIdx) + "$$h" + Name.substr(*InsertIdx)).str()); } std::optional llvm::getArm64ECDemangledFunctionName(StringRef Name) { if (Name[0] == '#') return std::optional(Name.substr(1)); if (Name[0] != '?') return std::nullopt; std::pair Pair = Name.split("$$h"); if (Pair.second.empty()) return std::nullopt; return std::optional((Pair.first + Pair.second).str()); } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 1053ba924276..95f2f91f82bd 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -1,10268 +1,10268 @@ //=- AArch64InstrInfo.td - Describe the AArch64 Instructions -*- tablegen -*-=// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // AArch64 Instruction definitions. // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // ARM Instruction Predicate Definitions. // class AssemblerPredicateWithAll : AssemblerPredicate<(any_of FeatureAll, cond), name>; def HasV8_0a : Predicate<"Subtarget->hasV8_0aOps()">, AssemblerPredicate<(all_of HasV8_0aOps), "armv8.0a">; def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">, AssemblerPredicateWithAll<(all_of HasV8_1aOps), "armv8.1a">; def HasV8_2a : Predicate<"Subtarget->hasV8_2aOps()">, AssemblerPredicateWithAll<(all_of HasV8_2aOps), "armv8.2a">; def HasV8_3a : Predicate<"Subtarget->hasV8_3aOps()">, AssemblerPredicateWithAll<(all_of HasV8_3aOps), "armv8.3a">; def HasV8_4a : Predicate<"Subtarget->hasV8_4aOps()">, AssemblerPredicateWithAll<(all_of HasV8_4aOps), "armv8.4a">; def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">, AssemblerPredicateWithAll<(all_of HasV8_5aOps), "armv8.5a">; def HasV8_6a : Predicate<"Subtarget->hasV8_6aOps()">, AssemblerPredicateWithAll<(all_of HasV8_6aOps), "armv8.6a">; def HasV8_7a : Predicate<"Subtarget->hasV8_7aOps()">, AssemblerPredicateWithAll<(all_of HasV8_7aOps), "armv8.7a">; def HasV8_8a : Predicate<"Subtarget->hasV8_8aOps()">, AssemblerPredicateWithAll<(all_of HasV8_8aOps), "armv8.8a">; def HasV8_9a : Predicate<"Subtarget->hasV8_9aOps()">, AssemblerPredicateWithAll<(all_of HasV8_9aOps), "armv8.9a">; def HasV9_0a : Predicate<"Subtarget->hasV9_0aOps()">, AssemblerPredicateWithAll<(all_of HasV9_0aOps), "armv9-a">; def HasV9_1a : Predicate<"Subtarget->hasV9_1aOps()">, AssemblerPredicateWithAll<(all_of HasV9_1aOps), "armv9.1a">; def HasV9_2a : Predicate<"Subtarget->hasV9_2aOps()">, AssemblerPredicateWithAll<(all_of HasV9_2aOps), "armv9.2a">; def HasV9_3a : Predicate<"Subtarget->hasV9_3aOps()">, AssemblerPredicateWithAll<(all_of HasV9_3aOps), "armv9.3a">; def HasV9_4a : Predicate<"Subtarget->hasV9_4aOps()">, AssemblerPredicateWithAll<(all_of HasV9_4aOps), "armv9.4a">; def HasV8_0r : Predicate<"Subtarget->hasV8_0rOps()">, AssemblerPredicateWithAll<(all_of HasV8_0rOps), "armv8-r">; def HasEL2VMSA : Predicate<"Subtarget->hasEL2VMSA()">, AssemblerPredicateWithAll<(all_of FeatureEL2VMSA), "el2vmsa">; def HasEL3 : Predicate<"Subtarget->hasEL3()">, AssemblerPredicateWithAll<(all_of FeatureEL3), "el3">; def HasVH : Predicate<"Subtarget->hasVH()">, AssemblerPredicateWithAll<(all_of FeatureVH), "vh">; def HasLOR : Predicate<"Subtarget->hasLOR()">, AssemblerPredicateWithAll<(all_of FeatureLOR), "lor">; def HasPAuth : Predicate<"Subtarget->hasPAuth()">, AssemblerPredicateWithAll<(all_of FeaturePAuth), "pauth">; def HasPAuthLR : Predicate<"Subtarget->hasPAuthLR()">, AssemblerPredicateWithAll<(all_of FeaturePAuthLR), "pauth-lr">; def HasJS : Predicate<"Subtarget->hasJS()">, AssemblerPredicateWithAll<(all_of FeatureJS), "jsconv">; def HasCCIDX : Predicate<"Subtarget->hasCCIDX()">, AssemblerPredicateWithAll<(all_of FeatureCCIDX), "ccidx">; def HasComplxNum : Predicate<"Subtarget->hasComplxNum()">, AssemblerPredicateWithAll<(all_of FeatureComplxNum), "complxnum">; def HasNV : Predicate<"Subtarget->hasNV()">, AssemblerPredicateWithAll<(all_of FeatureNV), "nv">; def HasMPAM : Predicate<"Subtarget->hasMPAM()">, AssemblerPredicateWithAll<(all_of FeatureMPAM), "mpam">; def HasDIT : Predicate<"Subtarget->hasDIT()">, AssemblerPredicateWithAll<(all_of FeatureDIT), "dit">; def HasTRACEV8_4 : Predicate<"Subtarget->hasTRACEV8_4()">, AssemblerPredicateWithAll<(all_of FeatureTRACEV8_4), "tracev8.4">; def HasAM : Predicate<"Subtarget->hasAM()">, AssemblerPredicateWithAll<(all_of FeatureAM), "am">; def HasSEL2 : Predicate<"Subtarget->hasSEL2()">, AssemblerPredicateWithAll<(all_of FeatureSEL2), "sel2">; def HasTLB_RMI : Predicate<"Subtarget->hasTLB_RMI()">, AssemblerPredicateWithAll<(all_of FeatureTLB_RMI), "tlb-rmi">; def HasFlagM : Predicate<"Subtarget->hasFlagM()">, AssemblerPredicateWithAll<(all_of FeatureFlagM), "flagm">; def HasRCPC_IMMO : Predicate<"Subtarget->hasRCPC_IMMO()">, AssemblerPredicateWithAll<(all_of FeatureRCPC_IMMO), "rcpc-immo">; def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">, AssemblerPredicateWithAll<(all_of FeatureFPARMv8), "fp-armv8">; def HasNEON : Predicate<"Subtarget->isNeonAvailable()">, AssemblerPredicateWithAll<(all_of FeatureNEON), "neon">; def HasSM4 : Predicate<"Subtarget->hasSM4()">, AssemblerPredicateWithAll<(all_of FeatureSM4), "sm4">; def HasSHA3 : Predicate<"Subtarget->hasSHA3()">, AssemblerPredicateWithAll<(all_of FeatureSHA3), "sha3">; def HasSHA2 : Predicate<"Subtarget->hasSHA2()">, AssemblerPredicateWithAll<(all_of FeatureSHA2), "sha2">; def HasAES : Predicate<"Subtarget->hasAES()">, AssemblerPredicateWithAll<(all_of FeatureAES), "aes">; def HasDotProd : Predicate<"Subtarget->hasDotProd()">, AssemblerPredicateWithAll<(all_of FeatureDotProd), "dotprod">; def HasCRC : Predicate<"Subtarget->hasCRC()">, AssemblerPredicateWithAll<(all_of FeatureCRC), "crc">; def HasCSSC : Predicate<"Subtarget->hasCSSC()">, AssemblerPredicateWithAll<(all_of FeatureCSSC), "cssc">; def HasNoCSSC : Predicate<"!Subtarget->hasCSSC()">; def HasLSE : Predicate<"Subtarget->hasLSE()">, AssemblerPredicateWithAll<(all_of FeatureLSE), "lse">; def HasNoLSE : Predicate<"!Subtarget->hasLSE()">; def HasRAS : Predicate<"Subtarget->hasRAS()">, AssemblerPredicateWithAll<(all_of FeatureRAS), "ras">; def HasRDM : Predicate<"Subtarget->hasRDM()">, AssemblerPredicateWithAll<(all_of FeatureRDM), "rdm">; def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">, AssemblerPredicateWithAll<(all_of FeatureFullFP16), "fullfp16">; def HasNoFullFP16 : Predicate<"!Subtarget->hasFullFP16()">; def HasFP16FML : Predicate<"Subtarget->hasFP16FML()">, AssemblerPredicateWithAll<(all_of FeatureFP16FML), "fp16fml">; def HasSPE : Predicate<"Subtarget->hasSPE()">, AssemblerPredicateWithAll<(all_of FeatureSPE), "spe">; def HasFuseAES : Predicate<"Subtarget->hasFuseAES()">, AssemblerPredicateWithAll<(all_of FeatureFuseAES), "fuse-aes">; def HasSVE : Predicate<"Subtarget->isSVEAvailable()">, AssemblerPredicateWithAll<(all_of FeatureSVE), "sve">; def HasSVE2 : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2()">, AssemblerPredicateWithAll<(all_of FeatureSVE2), "sve2">; def HasSVE2p1 : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2p1()">, AssemblerPredicateWithAll<(all_of FeatureSVE2p1), "sve2p1">; def HasSVE2AES : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2AES()">, AssemblerPredicateWithAll<(all_of FeatureSVE2AES), "sve2-aes">; def HasSVE2SM4 : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2SM4()">, AssemblerPredicateWithAll<(all_of FeatureSVE2SM4), "sve2-sm4">; def HasSVE2SHA3 : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2SHA3()">, AssemblerPredicateWithAll<(all_of FeatureSVE2SHA3), "sve2-sha3">; def HasSVE2BitPerm : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2BitPerm()">, AssemblerPredicateWithAll<(all_of FeatureSVE2BitPerm), "sve2-bitperm">; def HasB16B16 : Predicate<"Subtarget->hasB16B16()">, AssemblerPredicateWithAll<(all_of FeatureB16B16), "b16b16">; def HasSMEandIsNonStreamingSafe : Predicate<"Subtarget->hasSME()">, AssemblerPredicateWithAll<(all_of FeatureSME), "sme">; def HasSME : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME()">, AssemblerPredicateWithAll<(all_of FeatureSME), "sme">; def HasSMEF64F64 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF64F64()">, AssemblerPredicateWithAll<(all_of FeatureSMEF64F64), "sme-f64f64">; def HasSMEF16F16 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF16F16()">, AssemblerPredicateWithAll<(all_of FeatureSMEF16F16), "sme-f16f16">; def HasSMEFA64 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEFA64()">, AssemblerPredicateWithAll<(all_of FeatureSMEFA64), "sme-fa64">; def HasSMEI16I64 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEI16I64()">, AssemblerPredicateWithAll<(all_of FeatureSMEI16I64), "sme-i16i64">; def HasSME2andIsNonStreamingSafe : Predicate<"Subtarget->hasSME2()">, AssemblerPredicateWithAll<(all_of FeatureSME2), "sme2">; def HasSME2 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME2()">, AssemblerPredicateWithAll<(all_of FeatureSME2), "sme2">; def HasSME2p1 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME2p1()">, AssemblerPredicateWithAll<(all_of FeatureSME2p1), "sme2p1">; def HasFP8 : Predicate<"Subtarget->hasFP8()">, AssemblerPredicateWithAll<(all_of FeatureFP8), "fp8">; def HasFAMINMAX : Predicate<"Subtarget->hasFAMINMAX()">, AssemblerPredicateWithAll<(all_of FeatureFAMINMAX), "faminmax">; def HasFP8FMA : Predicate<"Subtarget->hasFP8FMA()">, AssemblerPredicateWithAll<(all_of FeatureFP8FMA), "fp8fma">; def HasSSVE_FP8FMA : Predicate<"Subtarget->hasSSVE_FP8FMA() || " "(Subtarget->hasSVE2() && Subtarget->hasFP8FMA())">, AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8FMA, (all_of FeatureSVE2, FeatureFP8FMA)), "ssve-fp8fma or (sve2 and fp8fma)">; def HasFP8DOT2 : Predicate<"Subtarget->hasFP8DOT2()">, AssemblerPredicateWithAll<(all_of FeatureFP8DOT2), "fp8dot2">; def HasSSVE_FP8DOT2 : Predicate<"Subtarget->hasSSVE_FP8DOT2() || " "(Subtarget->hasSVE2() && Subtarget->hasFP8DOT2())">, AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8DOT2, (all_of FeatureSVE2, FeatureFP8DOT2)), "ssve-fp8dot2 or (sve2 and fp8dot2)">; def HasFP8DOT4 : Predicate<"Subtarget->hasFP8DOT4()">, AssemblerPredicateWithAll<(all_of FeatureFP8DOT4), "fp8dot4">; def HasSSVE_FP8DOT4 : Predicate<"Subtarget->hasSSVE_FP8DOT4() || " "(Subtarget->hasSVE2() && Subtarget->hasFP8DOT4())">, AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8DOT4, (all_of FeatureSVE2, FeatureFP8DOT4)), "ssve-fp8dot4 or (sve2 and fp8dot4)">; def HasLUT : Predicate<"Subtarget->hasLUT()">, AssemblerPredicateWithAll<(all_of FeatureLUT), "lut">; def HasSME_LUTv2 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME_LUTv2()">, AssemblerPredicateWithAll<(all_of FeatureSME_LUTv2), "sme-lutv2">; def HasSMEF8F16 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF8F16()">, AssemblerPredicateWithAll<(all_of FeatureSMEF8F16), "sme-f8f16">; def HasSMEF8F32 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF8F32()">, AssemblerPredicateWithAll<(all_of FeatureSMEF8F32), "sme-f8f32">; // A subset of SVE(2) instructions are legal in Streaming SVE execution mode, // they should be enabled if either has been specified. def HasSVEorSME : Predicate<"Subtarget->hasSVE() || (Subtarget->isStreaming() && Subtarget->hasSME())">, AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME), "sve or sme">; def HasSVE2orSME : Predicate<"Subtarget->hasSVE2() || (Subtarget->isStreaming() && Subtarget->hasSME())">, AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME), "sve2 or sme">; def HasSVE2orSME2 : Predicate<"Subtarget->hasSVE2() || (Subtarget->isStreaming() && Subtarget->hasSME2())">, AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME2), "sve2 or sme2">; def HasSVE2p1_or_HasSME : Predicate<"Subtarget->hasSVE2p1() || (Subtarget->isStreaming() && Subtarget->hasSME())">, AssemblerPredicateWithAll<(any_of FeatureSME, FeatureSVE2p1), "sme or sve2p1">; def HasSVE2p1_or_HasSME2 : Predicate<"Subtarget->hasSVE2p1() || (Subtarget->isStreaming() && Subtarget->hasSME2())">, AssemblerPredicateWithAll<(any_of FeatureSME2, FeatureSVE2p1), "sme2 or sve2p1">; def HasSVE2p1_or_HasSME2p1 : Predicate<"Subtarget->hasSVE2p1() || (Subtarget->isStreaming() && Subtarget->hasSME2p1())">, AssemblerPredicateWithAll<(any_of FeatureSME2p1, FeatureSVE2p1), "sme2p1 or sve2p1">; def HasSMEF16F16orSMEF8F16 : Predicate<"Subtarget->isStreaming() && (Subtarget->hasSMEF16F16() || Subtarget->hasSMEF8F16())">, AssemblerPredicateWithAll<(any_of FeatureSMEF16F16, FeatureSMEF8F16), "sme-f16f16 or sme-f8f16">; // A subset of NEON instructions are legal in Streaming SVE execution mode, // so don't need the additional check for 'isNeonAvailable'. def HasNEONandIsStreamingSafe : Predicate<"Subtarget->hasNEON()">, AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">; def HasRCPC : Predicate<"Subtarget->hasRCPC()">, AssemblerPredicateWithAll<(all_of FeatureRCPC), "rcpc">; def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">, AssemblerPredicateWithAll<(all_of FeatureAltFPCmp), "altnzcv">; def HasFRInt3264 : Predicate<"Subtarget->hasFRInt3264()">, AssemblerPredicateWithAll<(all_of FeatureFRInt3264), "frint3264">; def HasSB : Predicate<"Subtarget->hasSB()">, AssemblerPredicateWithAll<(all_of FeatureSB), "sb">; def HasPredRes : Predicate<"Subtarget->hasPredRes()">, AssemblerPredicateWithAll<(all_of FeaturePredRes), "predres">; def HasCCDP : Predicate<"Subtarget->hasCCDP()">, AssemblerPredicateWithAll<(all_of FeatureCacheDeepPersist), "ccdp">; def HasBTI : Predicate<"Subtarget->hasBTI()">, AssemblerPredicateWithAll<(all_of FeatureBranchTargetId), "bti">; def HasMTE : Predicate<"Subtarget->hasMTE()">, AssemblerPredicateWithAll<(all_of FeatureMTE), "mte">; def HasTME : Predicate<"Subtarget->hasTME()">, AssemblerPredicateWithAll<(all_of FeatureTME), "tme">; def HasETE : Predicate<"Subtarget->hasETE()">, AssemblerPredicateWithAll<(all_of FeatureETE), "ete">; def HasTRBE : Predicate<"Subtarget->hasTRBE()">, AssemblerPredicateWithAll<(all_of FeatureTRBE), "trbe">; def HasBF16 : Predicate<"Subtarget->hasBF16()">, AssemblerPredicateWithAll<(all_of FeatureBF16), "bf16">; def HasNoBF16 : Predicate<"!Subtarget->hasBF16()">; def HasMatMulInt8 : Predicate<"Subtarget->hasMatMulInt8()">, AssemblerPredicateWithAll<(all_of FeatureMatMulInt8), "i8mm">; def HasMatMulFP32 : Predicate<"Subtarget->hasMatMulFP32()">, AssemblerPredicateWithAll<(all_of FeatureMatMulFP32), "f32mm">; def HasMatMulFP64 : Predicate<"Subtarget->hasMatMulFP64()">, AssemblerPredicateWithAll<(all_of FeatureMatMulFP64), "f64mm">; def HasFPAC : Predicate<"Subtarget->hasFPAC())">, AssemblerPredicateWithAll<(all_of FeatureFPAC), "fpac">; def HasXS : Predicate<"Subtarget->hasXS()">, AssemblerPredicateWithAll<(all_of FeatureXS), "xs">; def HasWFxT : Predicate<"Subtarget->hasWFxT()">, AssemblerPredicateWithAll<(all_of FeatureWFxT), "wfxt">; def HasLS64 : Predicate<"Subtarget->hasLS64()">, AssemblerPredicateWithAll<(all_of FeatureLS64), "ls64">; def HasBRBE : Predicate<"Subtarget->hasBRBE()">, AssemblerPredicateWithAll<(all_of FeatureBRBE), "brbe">; def HasSPE_EEF : Predicate<"Subtarget->hasSPE_EEF()">, AssemblerPredicateWithAll<(all_of FeatureSPE_EEF), "spe-eef">; def HasHBC : Predicate<"Subtarget->hasHBC()">, AssemblerPredicateWithAll<(all_of FeatureHBC), "hbc">; def HasMOPS : Predicate<"Subtarget->hasMOPS()">, AssemblerPredicateWithAll<(all_of FeatureMOPS), "mops">; def HasCLRBHB : Predicate<"Subtarget->hasCLRBHB()">, AssemblerPredicateWithAll<(all_of FeatureCLRBHB), "clrbhb">; def HasSPECRES2 : Predicate<"Subtarget->hasSPECRES2()">, AssemblerPredicateWithAll<(all_of FeatureSPECRES2), "specres2">; def HasITE : Predicate<"Subtarget->hasITE()">, AssemblerPredicateWithAll<(all_of FeatureITE), "ite">; def HasTHE : Predicate<"Subtarget->hasTHE()">, AssemblerPredicateWithAll<(all_of FeatureTHE), "the">; def HasRCPC3 : Predicate<"Subtarget->hasRCPC3()">, AssemblerPredicateWithAll<(all_of FeatureRCPC3), "rcpc3">; def HasLSE128 : Predicate<"Subtarget->hasLSE128()">, AssemblerPredicateWithAll<(all_of FeatureLSE128), "lse128">; def HasD128 : Predicate<"Subtarget->hasD128()">, AssemblerPredicateWithAll<(all_of FeatureD128), "d128">; def HasCHK : Predicate<"Subtarget->hasCHK()">, AssemblerPredicateWithAll<(all_of FeatureCHK), "chk">; def HasGCS : Predicate<"Subtarget->hasGCS()">, AssemblerPredicateWithAll<(all_of FeatureGCS), "gcs">; def HasCPA : Predicate<"Subtarget->hasCPA()">, AssemblerPredicateWithAll<(all_of FeatureCPA), "cpa">; def IsLE : Predicate<"Subtarget->isLittleEndian()">; def IsBE : Predicate<"!Subtarget->isLittleEndian()">; def IsWindows : Predicate<"Subtarget->isTargetWindows()">; def UseExperimentalZeroingPseudos : Predicate<"Subtarget->useExperimentalZeroingPseudos()">; def UseAlternateSExtLoadCVTF32 : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">; def UseNegativeImmediates : Predicate<"false">, AssemblerPredicate<(all_of (not FeatureNoNegativeImmediates)), "NegativeImmediates">; def UseScalarIncVL : Predicate<"Subtarget->useScalarIncVL()">; def NoUseScalarIncVL : Predicate<"!Subtarget->useScalarIncVL()">; def UseSVEFPLD1R : Predicate<"!Subtarget->noSVEFPLD1R()">; def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER", SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisInt<1>]>>; //===----------------------------------------------------------------------===// // AArch64-specific DAG Nodes. // // SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS def SDTBinaryArithWithFlagsOut : SDTypeProfile<2, 2, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<0>, SDTCisVT<1, i32>]>; // SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS def SDTBinaryArithWithFlagsIn : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisVT<3, i32>]>; // SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<0>, SDTCisVT<1, i32>, SDTCisVT<4, i32>]>; def SDT_AArch64Brcond : SDTypeProfile<0, 3, [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>; def SDT_AArch64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>; def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, SDTCisVT<2, OtherVT>]>; def SDT_AArch64CSel : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<3>, SDTCisVT<4, i32>]>; def SDT_AArch64CCMP : SDTypeProfile<1, 5, [SDTCisVT<0, i32>, SDTCisInt<1>, SDTCisSameAs<1, 2>, SDTCisInt<3>, SDTCisInt<4>, SDTCisVT<5, i32>]>; def SDT_AArch64FCCMP : SDTypeProfile<1, 5, [SDTCisVT<0, i32>, SDTCisFP<1>, SDTCisSameAs<1, 2>, SDTCisInt<3>, SDTCisInt<4>, SDTCisVT<5, i32>]>; def SDT_AArch64FCmp : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisSameAs<0, 1>]>; def SDT_AArch64Dup : SDTypeProfile<1, 1, [SDTCisVec<0>]>; def SDT_AArch64DupLane : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>; def SDT_AArch64Insr : SDTypeProfile<1, 2, [SDTCisVec<0>]>; def SDT_AArch64Zip : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>]>; def SDT_AArch64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>; def SDT_AArch64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>; def SDT_AArch64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisInt<2>, SDTCisInt<3>]>; def SDT_AArch64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisInt<3>]>; def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>; def SDT_AArch64Dot: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisVec<2>, SDTCisSameAs<2,3>]>; def SDT_AArch64vshiftinsert : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<3>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>; def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>; def SDT_AArch64fcmp : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>; def SDT_AArch64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>; def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>; def SDT_AArch64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>; def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>; def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>; def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; def SDT_AArch64uaddlp : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; def SDT_AArch64ldp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; def SDT_AArch64ldiapp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; def SDT_AArch64ldnp : SDTypeProfile<2, 1, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; def SDT_AArch64stp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; def SDT_AArch64stilp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; def SDT_AArch64stnp : SDTypeProfile<0, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; // Generates the general dynamic sequences, i.e. // adrp x0, :tlsdesc:var // ldr x1, [x0, #:tlsdesc_lo12:var] // add x0, x0, #:tlsdesc_lo12:var // .tlsdesccall var // blr x1 // (the TPIDR_EL0 offset is put directly in X0, hence no "result" here) // number of operands (the variable) def SDT_AArch64TLSDescCallSeq : SDTypeProfile<0,1, [SDTCisPtrTy<0>]>; def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4, [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>, SDTCisSameAs<1, 4>]>; def SDT_AArch64TBL : SDTypeProfile<1, 2, [ SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisInt<2> ]>; // non-extending masked load fragment. def nonext_masked_load : PatFrag<(ops node:$ptr, node:$pred, node:$def), (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ return cast(N)->getExtensionType() == ISD::NON_EXTLOAD && cast(N)->isUnindexed() && !cast(N)->isNonTemporal(); }]>; // Any/Zero extending masked load fragments. def azext_masked_load : PatFrag<(ops node:$ptr, node:$pred, node:$def), (masked_ld node:$ptr, undef, node:$pred, node:$def),[{ return (cast(N)->getExtensionType() == ISD::EXTLOAD || cast(N)->getExtensionType() == ISD::ZEXTLOAD) && cast(N)->isUnindexed(); }]>; def azext_masked_load_i8 : PatFrag<(ops node:$ptr, node:$pred, node:$def), (azext_masked_load node:$ptr, node:$pred, node:$def), [{ return cast(N)->getMemoryVT().getScalarType() == MVT::i8; }]>; def azext_masked_load_i16 : PatFrag<(ops node:$ptr, node:$pred, node:$def), (azext_masked_load node:$ptr, node:$pred, node:$def), [{ return cast(N)->getMemoryVT().getScalarType() == MVT::i16; }]>; def azext_masked_load_i32 : PatFrag<(ops node:$ptr, node:$pred, node:$def), (azext_masked_load node:$ptr, node:$pred, node:$def), [{ return cast(N)->getMemoryVT().getScalarType() == MVT::i32; }]>; // Sign extending masked load fragments. def sext_masked_load : PatFrag<(ops node:$ptr, node:$pred, node:$def), (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ return cast(N)->getExtensionType() == ISD::SEXTLOAD && cast(N)->isUnindexed(); }]>; def sext_masked_load_i8 : PatFrag<(ops node:$ptr, node:$pred, node:$def), (sext_masked_load node:$ptr, node:$pred, node:$def), [{ return cast(N)->getMemoryVT().getScalarType() == MVT::i8; }]>; def sext_masked_load_i16 : PatFrag<(ops node:$ptr, node:$pred, node:$def), (sext_masked_load node:$ptr, node:$pred, node:$def), [{ return cast(N)->getMemoryVT().getScalarType() == MVT::i16; }]>; def sext_masked_load_i32 : PatFrag<(ops node:$ptr, node:$pred, node:$def), (sext_masked_load node:$ptr, node:$pred, node:$def), [{ return cast(N)->getMemoryVT().getScalarType() == MVT::i32; }]>; def non_temporal_load : PatFrag<(ops node:$ptr, node:$pred, node:$def), (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ return cast(N)->getExtensionType() == ISD::NON_EXTLOAD && cast(N)->isUnindexed() && cast(N)->isNonTemporal(); }]>; // non-truncating masked store fragment. def nontrunc_masked_store : PatFrag<(ops node:$val, node:$ptr, node:$pred), (masked_st node:$val, node:$ptr, undef, node:$pred), [{ return !cast(N)->isTruncatingStore() && cast(N)->isUnindexed() && !cast(N)->isNonTemporal(); }]>; // truncating masked store fragments. def trunc_masked_store : PatFrag<(ops node:$val, node:$ptr, node:$pred), (masked_st node:$val, node:$ptr, undef, node:$pred), [{ return cast(N)->isTruncatingStore() && cast(N)->isUnindexed(); }]>; def trunc_masked_store_i8 : PatFrag<(ops node:$val, node:$ptr, node:$pred), (trunc_masked_store node:$val, node:$ptr, node:$pred), [{ return cast(N)->getMemoryVT().getScalarType() == MVT::i8; }]>; def trunc_masked_store_i16 : PatFrag<(ops node:$val, node:$ptr, node:$pred), (trunc_masked_store node:$val, node:$ptr, node:$pred), [{ return cast(N)->getMemoryVT().getScalarType() == MVT::i16; }]>; def trunc_masked_store_i32 : PatFrag<(ops node:$val, node:$ptr, node:$pred), (trunc_masked_store node:$val, node:$ptr, node:$pred), [{ return cast(N)->getMemoryVT().getScalarType() == MVT::i32; }]>; def non_temporal_store : PatFrag<(ops node:$val, node:$ptr, node:$pred), (masked_st node:$val, node:$ptr, undef, node:$pred), [{ return !cast(N)->isTruncatingStore() && cast(N)->isUnindexed() && cast(N)->isNonTemporal(); }]>; multiclass masked_gather_scatter { // offsets = (signed)Index << sizeof(elt) def NAME#_signed_scaled : PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{ auto MGS = cast(N); bool Signed = MGS->isIndexSigned() || MGS->getIndex().getValueType().getVectorElementType() == MVT::i64; return Signed && MGS->isIndexScaled(); }]>; // offsets = (signed)Index def NAME#_signed_unscaled : PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{ auto MGS = cast(N); bool Signed = MGS->isIndexSigned() || MGS->getIndex().getValueType().getVectorElementType() == MVT::i64; return Signed && !MGS->isIndexScaled(); }]>; // offsets = (unsigned)Index << sizeof(elt) def NAME#_unsigned_scaled : PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{ auto MGS = cast(N); bool Signed = MGS->isIndexSigned() || MGS->getIndex().getValueType().getVectorElementType() == MVT::i64; return !Signed && MGS->isIndexScaled(); }]>; // offsets = (unsigned)Index def NAME#_unsigned_unscaled : PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{ auto MGS = cast(N); bool Signed = MGS->isIndexSigned() || MGS->getIndex().getValueType().getVectorElementType() == MVT::i64; return !Signed && !MGS->isIndexScaled(); }]>; } defm nonext_masked_gather : masked_gather_scatter; defm azext_masked_gather_i8 : masked_gather_scatter; defm azext_masked_gather_i16 : masked_gather_scatter; defm azext_masked_gather_i32 : masked_gather_scatter; defm sext_masked_gather_i8 : masked_gather_scatter; defm sext_masked_gather_i16 : masked_gather_scatter; defm sext_masked_gather_i32 : masked_gather_scatter; defm nontrunc_masked_scatter : masked_gather_scatter; defm trunc_masked_scatter_i8 : masked_gather_scatter; defm trunc_masked_scatter_i16 : masked_gather_scatter; defm trunc_masked_scatter_i32 : masked_gather_scatter; // top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise def top16Zero: PatLeaf<(i32 GPR32:$src), [{ return SDValue(N,0)->getValueType(0) == MVT::i32 && CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16)); }]>; // top32Zero - answer true if the upper 32 bits of $src are 0, false otherwise def top32Zero: PatLeaf<(i64 GPR64:$src), [{ return SDValue(N,0)->getValueType(0) == MVT::i64 && CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 32)); }]>; // topbitsallzero - Return true if all bits except the lowest bit are known zero def topbitsallzero32: PatLeaf<(i32 GPR32:$src), [{ return SDValue(N,0)->getValueType(0) == MVT::i32 && CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 31)); }]>; def topbitsallzero64: PatLeaf<(i64 GPR64:$src), [{ return SDValue(N,0)->getValueType(0) == MVT::i64 && CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 63)); }]>; // Node definitions. def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>; def AArch64adr : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>; def AArch64addlow : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>; def AArch64LOADgot : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>; def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START", SDCallSeqStart<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>, [SDNPHasChain, SDNPOutGlue]>; def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END", SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def AArch64call : SDNode<"AArch64ISD::CALL", SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; def AArch64call_bti : SDNode<"AArch64ISD::CALL_BTI", SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; def AArch64call_rvmarker: SDNode<"AArch64ISD::CALL_RVMARKER", SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; def AArch64call_arm64ec_to_x64 : SDNode<"AArch64ISD::CALL_ARM64EC_TO_X64", SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; def AArch64authcall : SDNode<"AArch64ISD::AUTH_CALL", SDTypeProfile<0, -1, [SDTCisPtrTy<0>, SDTCisVT<1, i32>, SDTCisVT<2, i64>, SDTCisVT<3, i64>]>, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; def AArch64authtcret: SDNode<"AArch64ISD::AUTH_TC_RETURN", SDTypeProfile<0, 5, [SDTCisPtrTy<0>, SDTCisVT<2, i32>, SDTCisVT<3, i64>, SDTCisVT<4, i64>]>, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def AArch64authcall_rvmarker : SDNode<"AArch64ISD::AUTH_CALL_RVMARKER", SDTypeProfile<0, -1, [SDTCisPtrTy<0>, SDTCisPtrTy<1>, SDTCisVT<2, i32>, SDTCisVT<3, i64>, SDTCisVT<4, i64>]>, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; def AArch64brcond : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond, [SDNPHasChain]>; def AArch64cbz : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz, [SDNPHasChain]>; def AArch64cbnz : SDNode<"AArch64ISD::CBNZ", SDT_AArch64cbz, [SDNPHasChain]>; def AArch64tbz : SDNode<"AArch64ISD::TBZ", SDT_AArch64tbz, [SDNPHasChain]>; def AArch64tbnz : SDNode<"AArch64ISD::TBNZ", SDT_AArch64tbz, [SDNPHasChain]>; def AArch64csel : SDNode<"AArch64ISD::CSEL", SDT_AArch64CSel>; def AArch64csinv : SDNode<"AArch64ISD::CSINV", SDT_AArch64CSel>; def AArch64csneg : SDNode<"AArch64ISD::CSNEG", SDT_AArch64CSel>; def AArch64csinc : SDNode<"AArch64ISD::CSINC", SDT_AArch64CSel>; def AArch64retglue : SDNode<"AArch64ISD::RET_GLUE", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def AArch64adc : SDNode<"AArch64ISD::ADC", SDTBinaryArithWithFlagsIn >; def AArch64sbc : SDNode<"AArch64ISD::SBC", SDTBinaryArithWithFlagsIn>; def AArch64add_flag : SDNode<"AArch64ISD::ADDS", SDTBinaryArithWithFlagsOut, [SDNPCommutative]>; def AArch64sub_flag : SDNode<"AArch64ISD::SUBS", SDTBinaryArithWithFlagsOut>; def AArch64and_flag : SDNode<"AArch64ISD::ANDS", SDTBinaryArithWithFlagsOut, [SDNPCommutative]>; def AArch64adc_flag : SDNode<"AArch64ISD::ADCS", SDTBinaryArithWithFlagsInOut>; def AArch64sbc_flag : SDNode<"AArch64ISD::SBCS", SDTBinaryArithWithFlagsInOut>; def AArch64ccmp : SDNode<"AArch64ISD::CCMP", SDT_AArch64CCMP>; def AArch64ccmn : SDNode<"AArch64ISD::CCMN", SDT_AArch64CCMP>; def AArch64fccmp : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>; def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>; def AArch64fcmp : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>; def AArch64strict_fcmp : SDNode<"AArch64ISD::STRICT_FCMP", SDT_AArch64FCmp, [SDNPHasChain]>; def AArch64strict_fcmpe : SDNode<"AArch64ISD::STRICT_FCMPE", SDT_AArch64FCmp, [SDNPHasChain]>; def AArch64any_fcmp : PatFrags<(ops node:$lhs, node:$rhs), [(AArch64strict_fcmp node:$lhs, node:$rhs), (AArch64fcmp node:$lhs, node:$rhs)]>; def AArch64dup : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>; def AArch64duplane8 : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>; def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>; def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>; def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>; def AArch64duplane128 : SDNode<"AArch64ISD::DUPLANE128", SDT_AArch64DupLane>; def AArch64insr : SDNode<"AArch64ISD::INSR", SDT_AArch64Insr>; def AArch64zip1 : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>; def AArch64zip2 : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>; def AArch64uzp1 : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>; def AArch64uzp2 : SDNode<"AArch64ISD::UZP2", SDT_AArch64Zip>; def AArch64trn1 : SDNode<"AArch64ISD::TRN1", SDT_AArch64Zip>; def AArch64trn2 : SDNode<"AArch64ISD::TRN2", SDT_AArch64Zip>; def AArch64movi_edit : SDNode<"AArch64ISD::MOVIedit", SDT_AArch64MOVIedit>; def AArch64movi_shift : SDNode<"AArch64ISD::MOVIshift", SDT_AArch64MOVIshift>; def AArch64movi_msl : SDNode<"AArch64ISD::MOVImsl", SDT_AArch64MOVIshift>; def AArch64mvni_shift : SDNode<"AArch64ISD::MVNIshift", SDT_AArch64MOVIshift>; def AArch64mvni_msl : SDNode<"AArch64ISD::MVNImsl", SDT_AArch64MOVIshift>; def AArch64movi : SDNode<"AArch64ISD::MOVI", SDT_AArch64MOVIedit>; def AArch64fmov : SDNode<"AArch64ISD::FMOV", SDT_AArch64MOVIedit>; def AArch64rev16 : SDNode<"AArch64ISD::REV16", SDT_AArch64UnaryVec>; def AArch64rev32 : SDNode<"AArch64ISD::REV32", SDT_AArch64UnaryVec>; def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64UnaryVec>; def AArch64ext : SDNode<"AArch64ISD::EXT", SDT_AArch64ExtVec>; def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>; def AArch64vashr_exact : PatFrag<(ops node:$lhs, node:$rhs), (AArch64vashr node:$lhs, node:$rhs), [{ return N->getFlags().hasExact(); }]>; def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>; def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>; def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>; def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>; def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>; def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>; def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>; def AArch64vsli : SDNode<"AArch64ISD::VSLI", SDT_AArch64vshiftinsert>; def AArch64vsri : SDNode<"AArch64ISD::VSRI", SDT_AArch64vshiftinsert>; def AArch64bsp: SDNode<"AArch64ISD::BSP", SDT_AArch64trivec>; def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>; def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>; def AArch64cmgt: SDNode<"AArch64ISD::CMGT", SDT_AArch64binvec>; def AArch64cmhi: SDNode<"AArch64ISD::CMHI", SDT_AArch64binvec>; def AArch64cmhs: SDNode<"AArch64ISD::CMHS", SDT_AArch64binvec>; def AArch64fcmeq: SDNode<"AArch64ISD::FCMEQ", SDT_AArch64fcmp>; def AArch64fcmge: SDNode<"AArch64ISD::FCMGE", SDT_AArch64fcmp>; def AArch64fcmgt: SDNode<"AArch64ISD::FCMGT", SDT_AArch64fcmp>; def AArch64cmeqz: SDNode<"AArch64ISD::CMEQz", SDT_AArch64unvec>; def AArch64cmgez: SDNode<"AArch64ISD::CMGEz", SDT_AArch64unvec>; def AArch64cmgtz: SDNode<"AArch64ISD::CMGTz", SDT_AArch64unvec>; def AArch64cmlez: SDNode<"AArch64ISD::CMLEz", SDT_AArch64unvec>; def AArch64cmltz: SDNode<"AArch64ISD::CMLTz", SDT_AArch64unvec>; def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS), (vnot (AArch64cmeqz (and node:$LHS, node:$RHS)))>; def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>; def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>; def AArch64fcmgtz: SDNode<"AArch64ISD::FCMGTz", SDT_AArch64fcmpz>; def AArch64fcmlez: SDNode<"AArch64ISD::FCMLEz", SDT_AArch64fcmpz>; def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>; def AArch64fcvtxn_n: SDNode<"AArch64ISD::FCVTXN", SDTFPRoundOp>; def AArch64fcvtxnsdr: PatFrags<(ops node:$Rn), [(f32 (int_aarch64_sisd_fcvtxn (f64 node:$Rn))), (f32 (AArch64fcvtxn_n (f64 node:$Rn)))]>; def AArch64fcvtxnv: PatFrags<(ops node:$Rn), [(int_aarch64_neon_fcvtxn node:$Rn), (AArch64fcvtxn_n node:$Rn)]>; //def Aarch64softf32tobf16v8: SDNode<"AArch64ISD::", SDTFPRoundOp>; def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>; def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>; def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def AArch64Prefetch : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH, [SDNPHasChain, SDNPSideEffect]>; def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>; def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>; def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ", SDT_AArch64TLSDescCallSeq, [SDNPInGlue, SDNPOutGlue, SDNPHasChain, SDNPVariadic]>; def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge", SDT_AArch64WrapperLarge>; def AArch64NvCast : SDNode<"AArch64ISD::NVCAST", SDTUnaryOp>; def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 2>]>; def AArch64pmull : SDNode<"AArch64ISD::PMULL", SDT_AArch64mull, [SDNPCommutative]>; def AArch64smull : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull, [SDNPCommutative]>; def AArch64umull : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull, [SDNPCommutative]>; def AArch64frecpe : SDNode<"AArch64ISD::FRECPE", SDTFPUnaryOp>; def AArch64frecps : SDNode<"AArch64ISD::FRECPS", SDTFPBinOp>; def AArch64frsqrte : SDNode<"AArch64ISD::FRSQRTE", SDTFPUnaryOp>; def AArch64frsqrts : SDNode<"AArch64ISD::FRSQRTS", SDTFPBinOp>; def AArch64sdot : SDNode<"AArch64ISD::SDOT", SDT_AArch64Dot>; def AArch64udot : SDNode<"AArch64ISD::UDOT", SDT_AArch64Dot>; def AArch64saddv : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>; def AArch64uaddv : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>; def AArch64sminv : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>; def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>; def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>; def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>; def AArch64uaddlv : SDNode<"AArch64ISD::UADDLV", SDT_AArch64uaddlp>; def AArch64saddlv : SDNode<"AArch64ISD::SADDLV", SDT_AArch64uaddlp>; def AArch64uabd : PatFrags<(ops node:$lhs, node:$rhs), [(abdu node:$lhs, node:$rhs), (int_aarch64_neon_uabd node:$lhs, node:$rhs)]>; def AArch64sabd : PatFrags<(ops node:$lhs, node:$rhs), [(abds node:$lhs, node:$rhs), (int_aarch64_neon_sabd node:$lhs, node:$rhs)]>; def AArch64addp_n : SDNode<"AArch64ISD::ADDP", SDT_AArch64Zip>; def AArch64uaddlp_n : SDNode<"AArch64ISD::UADDLP", SDT_AArch64uaddlp>; def AArch64saddlp_n : SDNode<"AArch64ISD::SADDLP", SDT_AArch64uaddlp>; def AArch64addp : PatFrags<(ops node:$Rn, node:$Rm), [(AArch64addp_n node:$Rn, node:$Rm), (int_aarch64_neon_addp node:$Rn, node:$Rm)]>; def AArch64uaddlp : PatFrags<(ops node:$src), [(AArch64uaddlp_n node:$src), (int_aarch64_neon_uaddlp node:$src)]>; def AArch64saddlp : PatFrags<(ops node:$src), [(AArch64saddlp_n node:$src), (int_aarch64_neon_saddlp node:$src)]>; def AArch64faddp : PatFrags<(ops node:$Rn, node:$Rm), [(AArch64addp_n node:$Rn, node:$Rm), (int_aarch64_neon_faddp node:$Rn, node:$Rm)]>; def AArch64roundingvlshr : ComplexPattern; def AArch64rshrn : PatFrags<(ops node:$LHS, node:$RHS), [(trunc (AArch64roundingvlshr node:$LHS, node:$RHS)), (int_aarch64_neon_rshrn node:$LHS, node:$RHS)]>; def AArch64facge : PatFrags<(ops node:$Rn, node:$Rm), [(AArch64fcmge (fabs node:$Rn), (fabs node:$Rm)), (int_aarch64_neon_facge node:$Rn, node:$Rm)]>; def AArch64facgt : PatFrags<(ops node:$Rn, node:$Rm), [(AArch64fcmgt (fabs node:$Rn), (fabs node:$Rm)), (int_aarch64_neon_facgt node:$Rn, node:$Rm)]>; def AArch64fmaxnmv : PatFrags<(ops node:$Rn), [(vecreduce_fmax node:$Rn), (int_aarch64_neon_fmaxnmv node:$Rn)]>; def AArch64fminnmv : PatFrags<(ops node:$Rn), [(vecreduce_fmin node:$Rn), (int_aarch64_neon_fminnmv node:$Rn)]>; def AArch64fmaxv : PatFrags<(ops node:$Rn), [(vecreduce_fmaximum node:$Rn), (int_aarch64_neon_fmaxv node:$Rn)]>; def AArch64fminv : PatFrags<(ops node:$Rn), [(vecreduce_fminimum node:$Rn), (int_aarch64_neon_fminv node:$Rn)]>; def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def AArch64stzg : SDNode<"AArch64ISD::STZG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def AArch64st2g : SDNode<"AArch64ISD::ST2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def AArch64stz2g : SDNode<"AArch64ISD::STZ2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def SDT_AArch64unpk : SDTypeProfile<1, 1, [ SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0> ]>; def AArch64sunpkhi : SDNode<"AArch64ISD::SUNPKHI", SDT_AArch64unpk>; def AArch64sunpklo : SDNode<"AArch64ISD::SUNPKLO", SDT_AArch64unpk>; def AArch64uunpkhi : SDNode<"AArch64ISD::UUNPKHI", SDT_AArch64unpk>; def AArch64uunpklo : SDNode<"AArch64ISD::UUNPKLO", SDT_AArch64unpk>; def AArch64ldp : SDNode<"AArch64ISD::LDP", SDT_AArch64ldp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def AArch64ldiapp : SDNode<"AArch64ISD::LDIAPP", SDT_AArch64ldiapp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def AArch64ldnp : SDNode<"AArch64ISD::LDNP", SDT_AArch64ldnp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def AArch64stp : SDNode<"AArch64ISD::STP", SDT_AArch64stp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def AArch64stilp : SDNode<"AArch64ISD::STILP", SDT_AArch64stilp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def AArch64stnp : SDNode<"AArch64ISD::STNP", SDT_AArch64stnp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def AArch64tbl : SDNode<"AArch64ISD::TBL", SDT_AArch64TBL>; def AArch64probedalloca : SDNode<"AArch64ISD::PROBED_ALLOCA", SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, [SDNPHasChain, SDNPMayStore]>; def AArch64mrs : SDNode<"AArch64ISD::MRS", SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, i32>]>, [SDNPHasChain, SDNPOutGlue]>; def SD_AArch64rshrnb : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<2>]>; def AArch64rshrnb : SDNode<"AArch64ISD::RSHRNB_I", SD_AArch64rshrnb>; def AArch64rshrnb_pf : PatFrags<(ops node:$rs, node:$i), [(AArch64rshrnb node:$rs, node:$i), (int_aarch64_sve_rshrnb node:$rs, node:$i)]>; def AArch64CttzElts : SDNode<"AArch64ISD::CTTZ_ELTS", SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>]>, []>; // Match add node and also treat an 'or' node is as an 'add' if the or'ed operands // have no common bits. def add_and_or_is_add : PatFrags<(ops node:$lhs, node:$rhs), [(add node:$lhs, node:$rhs), (or node:$lhs, node:$rhs)],[{ if (N->getOpcode() == ISD::ADD) return true; return CurDAG->isADDLike(SDValue(N,0)); }]> { let GISelPredicateCode = [{ // Only handle G_ADD for now. FIXME. build capability to compute whether // operands of G_OR have common bits set or not. return MI.getOpcode() == TargetOpcode::G_ADD; }]; } // Match mul with enough sign-bits. Can be reduced to a smaller mul operand. def smullwithsignbits : PatFrag<(ops node:$l, node:$r), (mul node:$l, node:$r), [{ return CurDAG->ComputeNumSignBits(N->getOperand(0)) > 32 && CurDAG->ComputeNumSignBits(N->getOperand(1)) > 32; }]>; //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // AArch64 Instruction Predicate Definitions. // We could compute these on a per-module basis but doing so requires accessing // the Function object through the Subtarget and objections were raised // to that (see post-commit review comments for r301750). let RecomputePerFunction = 1 in { def ForCodeSize : Predicate<"shouldOptForSize(MF)">; def NotForCodeSize : Predicate<"!shouldOptForSize(MF)">; // Avoid generating STRQro if it is slow, unless we're optimizing for code size. def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || shouldOptForSize(MF)">; // Register restrictions for indirect tail-calls: // - If branch target enforcement is enabled, indirect calls must use x16 or // x17, because these are the only registers which can target the BTI C // instruction. // - If PAuthLR is enabled, x16 is used in the epilogue to hold the address // of the signing instruction. This can't be changed because it is used by a // HINT instruction which only accepts x16. We can't load anything from the // stack after this because the authentication instruction checks that SP is // the same as it was at function entry, so we can't have anything on the // stack. // BTI on, PAuthLR off: x16 or x17 def TailCallX16X17 : Predicate<[{ MF->getInfo()->branchTargetEnforcement() && !MF->getInfo()->branchProtectionPAuthLR() }]>; // BTI on, PAuthLR on: x17 only def TailCallX17 : Predicate<[{ MF->getInfo()->branchTargetEnforcement() && MF->getInfo()->branchProtectionPAuthLR() }]>; // BTI off, PAuthLR on: Any non-callee-saved register except x16 def TailCallNotX16 : Predicate<[{ !MF->getInfo()->branchTargetEnforcement() && MF->getInfo()->branchProtectionPAuthLR() }]>; // BTI off, PAuthLR off: Any non-callee-saved register def TailCallAny : Predicate<[{ !MF->getInfo()->branchTargetEnforcement() && !MF->getInfo()->branchProtectionPAuthLR() }]>; def SLSBLRMitigation : Predicate<[{ MF->getSubtarget().hardenSlsBlr() }]>; def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget().hardenSlsBlr() }]>; // Toggles patterns which aren't beneficial in GlobalISel when we aren't // optimizing. This allows us to selectively use patterns without impacting // SelectionDAG's behaviour. // FIXME: One day there will probably be a nicer way to check for this, but // today is not that day. def OptimizedGISelOrOtherSelector : Predicate<"!MF->getFunction().hasOptNone() || MF->getProperties().hasProperty(MachineFunctionProperties::Property::FailedISel) || !MF->getProperties().hasProperty(MachineFunctionProperties::Property::Legalized)">; } include "AArch64InstrFormats.td" include "SVEInstrFormats.td" include "SMEInstrFormats.td" //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // Miscellaneous instructions. //===----------------------------------------------------------------------===// let hasSideEffects = 1, isCodeGenOnly = 1 in { let Defs = [SP], Uses = [SP] in { // We set Sched to empty list because we expect these instructions to simply get // removed in most cases. def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), [(AArch64callseq_start timm:$amt1, timm:$amt2)]>, Sched<[]>; def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), [(AArch64callseq_end timm:$amt1, timm:$amt2)]>, Sched<[]>; } let Defs = [SP, NZCV], Uses = [SP] in { // Probed stack allocation of a constant size, used in function prologues when // stack-clash protection is enabled. def PROBED_STACKALLOC : Pseudo<(outs GPR64:$scratch), (ins i64imm:$stacksize, i64imm:$fixed_offset, i64imm:$scalable_offset), []>, Sched<[]>; // Probed stack allocation of a variable size, used in function prologues when // stack-clash protection is enabled. def PROBED_STACKALLOC_VAR : Pseudo<(outs), (ins GPR64sp:$target), []>, Sched<[]>; // Probed stack allocations of a variable size, used for allocas of unknown size // when stack-clash protection is enabled. let usesCustomInserter = 1 in def PROBED_STACKALLOC_DYN : Pseudo<(outs), (ins GPR64common:$target), [(AArch64probedalloca GPR64common:$target)]>, Sched<[]>; } // Defs = [SP, NZCV], Uses = [SP] in } // hasSideEffects = 1, isCodeGenOnly = 1 let isReMaterializable = 1, isCodeGenOnly = 1 in { // FIXME: The following pseudo instructions are only needed because remat // cannot handle multiple instructions. When that changes, they can be // removed, along with the AArch64Wrapper node. let AddedComplexity = 10 in def LOADgot : Pseudo<(outs GPR64common:$dst), (ins i64imm:$addr), [(set GPR64common:$dst, (AArch64LOADgot tglobaladdr:$addr))]>, Sched<[WriteLDAdr]>; // The MOVaddr instruction should match only when the add is not folded // into a load or store address. def MOVaddr : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi), tglobaladdr:$low))]>, Sched<[WriteAdrAdr]>; def MOVaddrJT : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi), tjumptable:$low))]>, Sched<[WriteAdrAdr]>; def MOVaddrCP : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi), tconstpool:$low))]>, Sched<[WriteAdrAdr]>; def MOVaddrBA : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi), tblockaddress:$low))]>, Sched<[WriteAdrAdr]>; def MOVaddrTLS : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi), tglobaltlsaddr:$low))]>, Sched<[WriteAdrAdr]>; def MOVaddrEXT : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), [(set GPR64common:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi), texternalsym:$low))]>, Sched<[WriteAdrAdr]>; // Normally AArch64addlow either gets folded into a following ldr/str, // or together with an adrp into MOVaddr above. For cases with TLS, it // might appear without either of them, so allow lowering it into a plain // add. def ADDlowTLS : Pseudo<(outs GPR64sp:$dst), (ins GPR64sp:$src, i64imm:$low), [(set GPR64sp:$dst, (AArch64addlow GPR64sp:$src, tglobaltlsaddr:$low))]>, Sched<[WriteAdr]>; } // isReMaterializable, isCodeGenOnly def : Pat<(AArch64LOADgot tglobaltlsaddr:$addr), (LOADgot tglobaltlsaddr:$addr)>; def : Pat<(AArch64LOADgot texternalsym:$addr), (LOADgot texternalsym:$addr)>; def : Pat<(AArch64LOADgot tconstpool:$addr), (LOADgot tconstpool:$addr)>; // In general these get lowered into a sequence of three 4-byte instructions. // 32-bit jump table destination is actually only 2 instructions since we can // use the table itself as a PC-relative base. But optimization occurs after // branch relaxation so be pessimistic. let Size = 12, Constraints = "@earlyclobber $dst,@earlyclobber $scratch", isNotDuplicable = 1 in { def JumpTableDest32 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch), (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>, Sched<[]>; def JumpTableDest16 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch), (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>, Sched<[]>; def JumpTableDest8 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch), (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>, Sched<[]>; } // A hardened but more expensive version of jump-table dispatch. // This combines the target address computation (otherwise done using the // JumpTableDest pseudos above) with the branch itself (otherwise done using // a plain BR) in a single non-attackable sequence. // // We take the final entry index as an operand to allow isel freedom. This does // mean that the index can be attacker-controlled. To address that, we also do // limited checking of the offset, mainly ensuring it still points within the // jump-table array. When it doesn't, this branches to the first entry. // We might want to trap instead. // // This is intended for use in conjunction with ptrauth for other code pointers, // to avoid signing jump-table entries and turning them into pointers. // // Entry index is passed in x16. Clobbers x16/x17/nzcv. let isNotDuplicable = 1 in def BR_JumpTable : Pseudo<(outs), (ins i32imm:$jti), []>, Sched<[]> { let isBranch = 1; let isTerminator = 1; let isIndirectBranch = 1; let isBarrier = 1; let isNotDuplicable = 1; let Defs = [X16,X17,NZCV]; let Uses = [X16]; let Size = 44; // 28 fixed + 16 variable, for table size materialization } // Space-consuming pseudo to aid testing of placement and reachability // algorithms. Immediate operand is the number of bytes this "instruction" // occupies; register operands can be used to enforce dependency and constrain // the scheduler. let hasSideEffects = 1, mayLoad = 1, mayStore = 1 in def SPACE : Pseudo<(outs GPR64:$Rd), (ins i32imm:$size, GPR64:$Rn), [(set GPR64:$Rd, (int_aarch64_space imm:$size, GPR64:$Rn))]>, Sched<[]>; let hasSideEffects = 1, isCodeGenOnly = 1 in { def SpeculationSafeValueX : Pseudo<(outs GPR64:$dst), (ins GPR64:$src), []>, Sched<[]>; def SpeculationSafeValueW : Pseudo<(outs GPR32:$dst), (ins GPR32:$src), []>, Sched<[]>; } // SpeculationBarrierEndBB must only be used after an unconditional control // flow, i.e. after a terminator for which isBarrier is True. let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in { // This gets lowered to a pair of 4-byte instructions. let Size = 8 in def SpeculationBarrierISBDSBEndBB : Pseudo<(outs), (ins), []>, Sched<[]>; // This gets lowered to a 4-byte instruction. let Size = 4 in def SpeculationBarrierSBEndBB : Pseudo<(outs), (ins), []>, Sched<[]>; } //===----------------------------------------------------------------------===// // System instructions. //===----------------------------------------------------------------------===// def HINT : HintI<"hint">; def : InstAlias<"nop", (HINT 0b000)>; def : InstAlias<"yield",(HINT 0b001)>; def : InstAlias<"wfe", (HINT 0b010)>; def : InstAlias<"wfi", (HINT 0b011)>; def : InstAlias<"sev", (HINT 0b100)>; def : InstAlias<"sevl", (HINT 0b101)>; def : InstAlias<"dgh", (HINT 0b110)>; def : InstAlias<"esb", (HINT 0b10000)>, Requires<[HasRAS]>; def : InstAlias<"csdb", (HINT 20)>; // In order to be able to write readable assembly, LLVM should accept assembly // inputs that use Branch Target Indentification mnemonics, even with BTI disabled. // However, in order to be compatible with other assemblers (e.g. GAS), LLVM // should not emit these mnemonics unless BTI is enabled. def : InstAlias<"bti", (HINT 32), 0>; def : InstAlias<"bti $op", (HINT btihint_op:$op), 0>; def : InstAlias<"bti", (HINT 32)>, Requires<[HasBTI]>; def : InstAlias<"bti $op", (HINT btihint_op:$op)>, Requires<[HasBTI]>; // v8.2a Statistical Profiling extension def : InstAlias<"psb $op", (HINT psbhint_op:$op)>, Requires<[HasSPE]>; // As far as LLVM is concerned this writes to the system's exclusive monitors. let mayLoad = 1, mayStore = 1 in def CLREX : CRmSystemI; // NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot // model patterns with sufficiently fine granularity. let mayLoad = ?, mayStore = ? in { def DMB : CRmSystemI; def DSB : CRmSystemI; def ISB : CRmSystemI; def TSB : CRmSystemI { let CRm = 0b0010; let Inst{12} = 0; let Predicates = [HasTRACEV8_4]; } def DSBnXS : CRmSystemI { let CRm{1-0} = 0b11; let Inst{9-8} = 0b10; let Predicates = [HasXS]; } let Predicates = [HasWFxT] in { def WFET : RegInputSystemI<0b0000, 0b000, "wfet">; def WFIT : RegInputSystemI<0b0000, 0b001, "wfit">; } // Branch Record Buffer two-word mnemonic instructions class BRBEI op2, string keyword> : SimpleSystemI<0, (ins), "brb", keyword>, Sched<[WriteSys]> { let Inst{31-8} = 0b110101010000100101110010; let Inst{7-5} = op2; let Predicates = [HasBRBE]; } def BRB_IALL: BRBEI<0b100, "\tiall">; def BRB_INJ: BRBEI<0b101, "\tinj">; } // Allow uppercase and lowercase keyword arguments for BRB IALL and BRB INJ def : TokenAlias<"INJ", "inj">; def : TokenAlias<"IALL", "iall">; // ARMv9.4-A Guarded Control Stack class GCSNoOp op2, string mnemonic> : SimpleSystemI<0, (ins), mnemonic, "">, Sched<[]> { let Inst{20-8} = 0b0100001110111; let Inst{7-5} = op2; let Predicates = [HasGCS]; } def GCSPUSHX : GCSNoOp<0b100, "gcspushx">; def GCSPOPCX : GCSNoOp<0b101, "gcspopcx">; def GCSPOPX : GCSNoOp<0b110, "gcspopx">; class GCSRtIn op1, bits<3> op2, string mnemonic, list pattern = []> : RtSystemI<0, (outs), (ins GPR64:$Rt), mnemonic, "\t$Rt", pattern> { let Inst{20-19} = 0b01; let Inst{18-16} = op1; let Inst{15-8} = 0b01110111; let Inst{7-5} = op2; let Predicates = [HasGCS]; let hasSideEffects = 1; } let mayStore = 1, mayLoad = 1 in def GCSSS1 : GCSRtIn<0b011, 0b010, "gcsss1">; let mayStore = 1 in def GCSPUSHM : GCSRtIn<0b011, 0b000, "gcspushm">; class GCSRtOut op1, bits<3> op2, string mnemonic, list pattern = []> : RtSystemI<1, (outs GPR64:$Rt), (ins GPR64:$src), mnemonic, "\t$Rt", pattern> { let Inst{20-19} = 0b01; let Inst{18-16} = op1; let Inst{15-8} = 0b01110111; let Inst{7-5} = op2; let Predicates = [HasGCS]; let hasSideEffects = 1; // The input register is unchanged when GCS is disabled, so we need it as // both an input and output operand. let Constraints = "$src = $Rt"; } let mayStore = 1, mayLoad = 1 in def GCSSS2 : GCSRtOut<0b011, 0b011, "gcsss2">; // FIXME: mayStore = 1 only needed to match the intrinsic definition let mayStore = 1, mayLoad = 1 in def GCSPOPM : GCSRtOut<0b011, 0b001, "gcspopm", [(set GPR64:$Rt, (int_aarch64_gcspopm GPR64:$src))]>; def GCSPOPM_NoOp : InstAlias<"gcspopm", (GCSPOPM XZR)>, Requires<[HasGCS]>; // Rt defaults to XZR if absent def GCSB_DSYNC_disable : InstAlias<"gcsb\tdsync", (HINT 19), 0>; def GCSB_DSYNC : InstAlias<"gcsb\tdsync", (HINT 19), 1>, Requires<[HasGCS]>; def : TokenAlias<"DSYNC", "dsync">; let Uses = [X16], Defs = [X16], CRm = 0b0101 in { def CHKFEAT : SystemNoOperands<0b000, "hint\t#40", [(set X16, (int_aarch64_chkfeat X16))]>; } def : InstAlias<"chkfeat\tx16", (CHKFEAT), 0>; def : InstAlias<"chkfeat\tx16", (CHKFEAT), 1>, Requires<[HasCHK]>; class GCSSt op> : I<(outs), (ins GPR64:$Rt, GPR64sp:$Rn), mnemonic, "\t$Rt, [$Rn]", "", []>, Sched<[]> { bits<5> Rt; bits<5> Rn; let Inst{31-15} = 0b11011001000111110; let Inst{14-12} = op; let Inst{11-10} = 0b11; let Inst{9-5} = Rn; let Inst{4-0} = Rt; let Predicates = [HasGCS]; } def GCSSTR : GCSSt<"gcsstr", 0b000>; def GCSSTTR : GCSSt<"gcssttr", 0b001>; // ARMv8.2-A Dot Product let Predicates = [HasDotProd] in { defm SDOT : SIMDThreeSameVectorDot<0, 0, "sdot", AArch64sdot>; defm UDOT : SIMDThreeSameVectorDot<1, 0, "udot", AArch64udot>; defm SDOTlane : SIMDThreeSameVectorDotIndex<0, 0, 0b10, "sdot", AArch64sdot>; defm UDOTlane : SIMDThreeSameVectorDotIndex<1, 0, 0b10, "udot", AArch64udot>; } // ARMv8.6-A BFloat let Predicates = [HasNEON, HasBF16] in { defm BFDOT : SIMDThreeSameVectorBFDot<1, "bfdot">; defm BF16DOTlane : SIMDThreeSameVectorBF16DotI<0, "bfdot">; def BFMMLA : SIMDThreeSameVectorBF16MatrixMul<"bfmmla">; def BFMLALB : SIMDBF16MLAL<0, "bfmlalb", int_aarch64_neon_bfmlalb>; def BFMLALT : SIMDBF16MLAL<1, "bfmlalt", int_aarch64_neon_bfmlalt>; def BFMLALBIdx : SIMDBF16MLALIndex<0, "bfmlalb", int_aarch64_neon_bfmlalb>; def BFMLALTIdx : SIMDBF16MLALIndex<1, "bfmlalt", int_aarch64_neon_bfmlalt>; def BFCVTN : SIMD_BFCVTN; def BFCVTN2 : SIMD_BFCVTN2; def : Pat<(v4bf16 (any_fpround (v4f32 V128:$Rn))), (EXTRACT_SUBREG (BFCVTN V128:$Rn), dsub)>; // Vector-scalar BFDOT: // The second source operand of the 64-bit variant of BF16DOTlane is a 128-bit // register (the instruction uses a single 32-bit lane from it), so the pattern // is a bit tricky. def : Pat<(v2f32 (int_aarch64_neon_bfdot (v2f32 V64:$Rd), (v4bf16 V64:$Rn), (v4bf16 (bitconvert (v2i32 (AArch64duplane32 (v4i32 (bitconvert (v8bf16 (insert_subvector undef, (v4bf16 V64:$Rm), (i64 0))))), VectorIndexS:$idx)))))), (BF16DOTlanev4bf16 (v2f32 V64:$Rd), (v4bf16 V64:$Rn), (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>; } let Predicates = [HasNEONandIsStreamingSafe, HasBF16] in { def BFCVT : BF16ToSinglePrecision<"bfcvt">; // Round FP32 to BF16. def : Pat<(bf16 (any_fpround (f32 FPR32:$Rn))), (BFCVT $Rn)>; } // ARMv8.6A AArch64 matrix multiplication let Predicates = [HasMatMulInt8] in { def SMMLA : SIMDThreeSameVectorMatMul<0, 0, "smmla", int_aarch64_neon_smmla>; def UMMLA : SIMDThreeSameVectorMatMul<0, 1, "ummla", int_aarch64_neon_ummla>; def USMMLA : SIMDThreeSameVectorMatMul<1, 0, "usmmla", int_aarch64_neon_usmmla>; defm USDOT : SIMDThreeSameVectorDot<0, 1, "usdot", int_aarch64_neon_usdot>; defm USDOTlane : SIMDThreeSameVectorDotIndex<0, 1, 0b10, "usdot", int_aarch64_neon_usdot>; // sudot lane has a pattern where usdot is expected (there is no sudot). // The second operand is used in the dup operation to repeat the indexed // element. class BaseSIMDSUDOTIndex : BaseSIMDThreeSameVectorIndexS { let Pattern = [(set (AccumType RegType:$dst), (AccumType (int_aarch64_neon_usdot (AccumType RegType:$Rd), (InputType (bitconvert (AccumType (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx)))), (InputType RegType:$Rn))))]; } multiclass SIMDSUDOTIndex { def v8i8 : BaseSIMDSUDOTIndex<0, ".2s", ".8b", ".4b", V64, v2i32, v8i8>; def v16i8 : BaseSIMDSUDOTIndex<1, ".4s", ".16b", ".4b", V128, v4i32, v16i8>; } defm SUDOTlane : SIMDSUDOTIndex; } // ARMv8.2-A FP16 Fused Multiply-Add Long let Predicates = [HasNEON, HasFP16FML] in { defm FMLAL : SIMDThreeSameVectorFML<0, 1, 0b001, "fmlal", int_aarch64_neon_fmlal>; defm FMLSL : SIMDThreeSameVectorFML<0, 1, 0b101, "fmlsl", int_aarch64_neon_fmlsl>; defm FMLAL2 : SIMDThreeSameVectorFML<1, 0, 0b001, "fmlal2", int_aarch64_neon_fmlal2>; defm FMLSL2 : SIMDThreeSameVectorFML<1, 0, 0b101, "fmlsl2", int_aarch64_neon_fmlsl2>; defm FMLALlane : SIMDThreeSameVectorFMLIndex<0, 0b0000, "fmlal", int_aarch64_neon_fmlal>; defm FMLSLlane : SIMDThreeSameVectorFMLIndex<0, 0b0100, "fmlsl", int_aarch64_neon_fmlsl>; defm FMLAL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1000, "fmlal2", int_aarch64_neon_fmlal2>; defm FMLSL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1100, "fmlsl2", int_aarch64_neon_fmlsl2>; } // Armv8.2-A Crypto extensions let Predicates = [HasSHA3] in { def SHA512H : CryptoRRRTied<0b0, 0b00, "sha512h">; def SHA512H2 : CryptoRRRTied<0b0, 0b01, "sha512h2">; def SHA512SU0 : CryptoRRTied_2D<0b0, 0b00, "sha512su0">; def SHA512SU1 : CryptoRRRTied_2D<0b0, 0b10, "sha512su1">; def RAX1 : CryptoRRR_2D<0b0,0b11, "rax1">; def EOR3 : CryptoRRRR_16B<0b00, "eor3">; def BCAX : CryptoRRRR_16B<0b01, "bcax">; def XAR : CryptoRRRi6<"xar">; class SHA3_pattern : Pat<(VecTy (OpNode (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))), (INST (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))>; def : Pat<(v2i64 (int_aarch64_crypto_sha512su0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))), (SHA512SU0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>; def : SHA3_pattern; def : SHA3_pattern; def : SHA3_pattern; def : SHA3_pattern; def : SHA3_pattern; def : SHA3_pattern; def : SHA3_pattern; class EOR3_pattern : Pat<(xor (xor (VecTy V128:$Vn), (VecTy V128:$Vm)), (VecTy V128:$Va)), (EOR3 (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>; def : EOR3_pattern; def : EOR3_pattern; def : EOR3_pattern; def : EOR3_pattern; class BCAX_pattern : Pat<(xor (VecTy V128:$Vn), (and (VecTy V128:$Vm), (vnot (VecTy V128:$Va)))), (BCAX (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>; def : BCAX_pattern; def : BCAX_pattern; def : BCAX_pattern; def : BCAX_pattern; def : SHA3_pattern; def : SHA3_pattern; def : SHA3_pattern; def : SHA3_pattern; def : SHA3_pattern; def : SHA3_pattern; def : SHA3_pattern; def : SHA3_pattern; def : SHA3_pattern; def : SHA3_pattern; def : SHA3_pattern; def : SHA3_pattern; def : Pat<(v2i64 (int_aarch64_crypto_rax1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))), (RAX1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>; def : Pat<(v2i64 (int_aarch64_crypto_xar (v2i64 V128:$Vn), (v2i64 V128:$Vm), (i64 timm0_63:$imm))), (XAR (v2i64 V128:$Vn), (v2i64 V128:$Vm), (timm0_63:$imm))>; def : Pat<(xor (v2i64 V128:$Vn), (or (AArch64vlshr (v2i64 V128:$Vm), (i32 63)), (AArch64vshl (v2i64 V128:$Vm), (i32 1)))), (RAX1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>; } // HasSHA3 let Predicates = [HasSM4] in { def SM3TT1A : CryptoRRRi2Tied<0b0, 0b00, "sm3tt1a">; def SM3TT1B : CryptoRRRi2Tied<0b0, 0b01, "sm3tt1b">; def SM3TT2A : CryptoRRRi2Tied<0b0, 0b10, "sm3tt2a">; def SM3TT2B : CryptoRRRi2Tied<0b0, 0b11, "sm3tt2b">; def SM3SS1 : CryptoRRRR_4S<0b10, "sm3ss1">; def SM3PARTW1 : CryptoRRRTied_4S<0b1, 0b00, "sm3partw1">; def SM3PARTW2 : CryptoRRRTied_4S<0b1, 0b01, "sm3partw2">; def SM4ENCKEY : CryptoRRR_4S<0b1, 0b10, "sm4ekey">; def SM4E : CryptoRRTied_4S<0b0, 0b01, "sm4e">; def : Pat<(v4i32 (int_aarch64_crypto_sm3ss1 (v4i32 V128:$Vn), (v4i32 V128:$Vm), (v4i32 V128:$Va))), (SM3SS1 (v4i32 V128:$Vn), (v4i32 V128:$Vm), (v4i32 V128:$Va))>; class SM3PARTW_pattern : Pat<(v4i32 (OpNode (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm))), (INST (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm))>; class SM3TT_pattern : Pat<(v4i32 (OpNode (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm), (i64 VectorIndexS_timm:$imm) )), (INST (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm), (VectorIndexS_timm:$imm))>; class SM4_pattern : Pat<(v4i32 (OpNode (v4i32 V128:$Vn), (v4i32 V128:$Vm))), (INST (v4i32 V128:$Vn), (v4i32 V128:$Vm))>; def : SM3PARTW_pattern; def : SM3PARTW_pattern; def : SM3TT_pattern; def : SM3TT_pattern; def : SM3TT_pattern; def : SM3TT_pattern; def : SM4_pattern; def : SM4_pattern; } // HasSM4 let Predicates = [HasRCPC] in { // v8.3 Release Consistent Processor Consistent support, optional in v8.2. def LDAPRB : RCPCLoad<0b00, "ldaprb", GPR32>; def LDAPRH : RCPCLoad<0b01, "ldaprh", GPR32>; def LDAPRW : RCPCLoad<0b10, "ldapr", GPR32>; def LDAPRX : RCPCLoad<0b11, "ldapr", GPR64>; } // v8.3a complex add and multiply-accumulate. No predicate here, that is done // inside the multiclass as the FP16 versions need different predicates. defm FCMLA : SIMDThreeSameVectorTiedComplexHSD<1, 0b110, complexrotateop, "fcmla", null_frag>; defm FCADD : SIMDThreeSameVectorComplexHSD<1, 0b111, complexrotateopodd, "fcadd", null_frag>; defm FCMLA : SIMDIndexedTiedComplexHSD<0, 1, complexrotateop, "fcmla">; let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in { def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot90 (v4f16 V64:$Rn), (v4f16 V64:$Rm))), (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 0))>; def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot270 (v4f16 V64:$Rn), (v4f16 V64:$Rm))), (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 1))>; def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot90 (v8f16 V128:$Rn), (v8f16 V128:$Rm))), (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 0))>; def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot270 (v8f16 V128:$Rn), (v8f16 V128:$Rm))), (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 1))>; } let Predicates = [HasComplxNum, HasNEON] in { def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot90 (v2f32 V64:$Rn), (v2f32 V64:$Rm))), (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 0))>; def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot270 (v2f32 V64:$Rn), (v2f32 V64:$Rm))), (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 1))>; foreach Ty = [v4f32, v2f64] in { def : Pat<(Ty (int_aarch64_neon_vcadd_rot90 (Ty V128:$Rn), (Ty V128:$Rm))), (!cast("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 0))>; def : Pat<(Ty (int_aarch64_neon_vcadd_rot270 (Ty V128:$Rn), (Ty V128:$Rm))), (!cast("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 1))>; } } multiclass FCMLA_PATS { def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))), (!cast("FCMLA" # ty) $Rd, $Rn, $Rm, 0)>; def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))), (!cast("FCMLA" # ty) $Rd, $Rn, $Rm, 1)>; def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))), (!cast("FCMLA" # ty) $Rd, $Rn, $Rm, 2)>; def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))), (!cast("FCMLA" # ty) $Rd, $Rn, $Rm, 3)>; } multiclass FCMLA_LANE_PATS { def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), (!cast("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 0)>; def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), (!cast("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 1)>; def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), (!cast("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 2)>; def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), (!cast("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 3)>; } let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in { defm : FCMLA_PATS; defm : FCMLA_PATS; defm : FCMLA_LANE_PATS; defm : FCMLA_LANE_PATS; } let Predicates = [HasComplxNum, HasNEON] in { defm : FCMLA_PATS; defm : FCMLA_PATS; defm : FCMLA_PATS; defm : FCMLA_LANE_PATS; } // v8.3a Pointer Authentication // These instructions inhabit part of the hint space and so can be used for // armv8 targets. Keeping the old HINT mnemonic when compiling without PA is // important for compatibility with other assemblers (e.g. GAS) when building // software compatible with both CPUs that do or don't implement PA. let Uses = [LR], Defs = [LR] in { def PACIAZ : SystemNoOperands<0b000, "hint\t#24">; def PACIBZ : SystemNoOperands<0b010, "hint\t#26">; let isAuthenticated = 1 in { def AUTIAZ : SystemNoOperands<0b100, "hint\t#28">; def AUTIBZ : SystemNoOperands<0b110, "hint\t#30">; } } let Uses = [LR, SP], Defs = [LR] in { def PACIASP : SystemNoOperands<0b001, "hint\t#25">; def PACIBSP : SystemNoOperands<0b011, "hint\t#27">; let isAuthenticated = 1 in { def AUTIASP : SystemNoOperands<0b101, "hint\t#29">; def AUTIBSP : SystemNoOperands<0b111, "hint\t#31">; } } let Uses = [X16, X17], Defs = [X17], CRm = 0b0001 in { def PACIA1716 : SystemNoOperands<0b000, "hint\t#8">; def PACIB1716 : SystemNoOperands<0b010, "hint\t#10">; let isAuthenticated = 1 in { def AUTIA1716 : SystemNoOperands<0b100, "hint\t#12">; def AUTIB1716 : SystemNoOperands<0b110, "hint\t#14">; } } let Uses = [LR], Defs = [LR], CRm = 0b0000 in { def XPACLRI : SystemNoOperands<0b111, "hint\t#7">; } // In order to be able to write readable assembly, LLVM should accept assembly // inputs that use pointer authentication mnemonics, even with PA disabled. // However, in order to be compatible with other assemblers (e.g. GAS), LLVM // should not emit these mnemonics unless PA is enabled. def : InstAlias<"paciaz", (PACIAZ), 0>; def : InstAlias<"pacibz", (PACIBZ), 0>; def : InstAlias<"autiaz", (AUTIAZ), 0>; def : InstAlias<"autibz", (AUTIBZ), 0>; def : InstAlias<"paciasp", (PACIASP), 0>; def : InstAlias<"pacibsp", (PACIBSP), 0>; def : InstAlias<"autiasp", (AUTIASP), 0>; def : InstAlias<"autibsp", (AUTIBSP), 0>; def : InstAlias<"pacia1716", (PACIA1716), 0>; def : InstAlias<"pacib1716", (PACIB1716), 0>; def : InstAlias<"autia1716", (AUTIA1716), 0>; def : InstAlias<"autib1716", (AUTIB1716), 0>; def : InstAlias<"xpaclri", (XPACLRI), 0>; // Pseudos let Uses = [LR, SP], Defs = [LR] in { // Insertion point of LR signing code. def PAUTH_PROLOGUE : Pseudo<(outs), (ins), []>, Sched<[]>; // Insertion point of LR authentication code. // The RET terminator of the containing machine basic block may be replaced // with a combined RETA(A|B) instruction when rewriting this Pseudo. def PAUTH_EPILOGUE : Pseudo<(outs), (ins), []>, Sched<[]>; } def PAUTH_BLEND : Pseudo<(outs GPR64:$disc), (ins GPR64:$addr_disc, i32imm:$int_disc), []>, Sched<[]>; // These pointer authentication instructions require armv8.3a let Predicates = [HasPAuth] in { // When PA is enabled, a better mnemonic should be emitted. def : InstAlias<"paciaz", (PACIAZ), 1>; def : InstAlias<"pacibz", (PACIBZ), 1>; def : InstAlias<"autiaz", (AUTIAZ), 1>; def : InstAlias<"autibz", (AUTIBZ), 1>; def : InstAlias<"paciasp", (PACIASP), 1>; def : InstAlias<"pacibsp", (PACIBSP), 1>; def : InstAlias<"autiasp", (AUTIASP), 1>; def : InstAlias<"autibsp", (AUTIBSP), 1>; def : InstAlias<"pacia1716", (PACIA1716), 1>; def : InstAlias<"pacib1716", (PACIB1716), 1>; def : InstAlias<"autia1716", (AUTIA1716), 1>; def : InstAlias<"autib1716", (AUTIB1716), 1>; def : InstAlias<"xpaclri", (XPACLRI), 1>; multiclass SignAuth prefix, bits<3> prefix_z, string asm, SDPatternOperator op> { def IA : SignAuthOneData; def IB : SignAuthOneData; def DA : SignAuthOneData; def DB : SignAuthOneData; def IZA : SignAuthZero; def DZA : SignAuthZero; def IZB : SignAuthZero; def DZB : SignAuthZero; } defm PAC : SignAuth<0b000, 0b010, "pac", int_ptrauth_sign>; defm AUT : SignAuth<0b001, 0b011, "aut", null_frag>; def XPACI : ClearAuth<0, "xpaci">; def : Pat<(int_ptrauth_strip GPR64:$Rd, 0), (XPACI GPR64:$Rd)>; def : Pat<(int_ptrauth_strip GPR64:$Rd, 1), (XPACI GPR64:$Rd)>; def XPACD : ClearAuth<1, "xpacd">; def : Pat<(int_ptrauth_strip GPR64:$Rd, 2), (XPACD GPR64:$Rd)>; def : Pat<(int_ptrauth_strip GPR64:$Rd, 3), (XPACD GPR64:$Rd)>; def PACGA : SignAuthTwoOperand<0b1100, "pacga", int_ptrauth_sign_generic>; // Combined Instructions let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { def BRAA : AuthBranchTwoOperands<0, 0, "braa">; def BRAB : AuthBranchTwoOperands<0, 1, "brab">; } let isCall = 1, Defs = [LR], Uses = [SP] in { def BLRAA : AuthBranchTwoOperands<1, 0, "blraa">; def BLRAB : AuthBranchTwoOperands<1, 1, "blrab">; } let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { def BRAAZ : AuthOneOperand<0b000, 0, "braaz">; def BRABZ : AuthOneOperand<0b000, 1, "brabz">; } let isCall = 1, Defs = [LR], Uses = [SP] in { def BLRAAZ : AuthOneOperand<0b001, 0, "blraaz">; def BLRABZ : AuthOneOperand<0b001, 1, "blrabz">; } // BLRA pseudo, a generalized version of BLRAA/BLRAB/Z. // This directly manipulates x16/x17 to materialize the discriminator. // x16/x17 are generally used as the safe registers for sensitive ptrauth // operations (such as raw address manipulation or discriminator // materialization here), in part because they're handled in a safer way by // the kernel, notably on Darwin. def BLRA : Pseudo<(outs), (ins GPR64noip:$Rn, i32imm:$Key, i64imm:$Disc, GPR64noip:$AddrDisc), [(AArch64authcall GPR64noip:$Rn, timm:$Key, timm:$Disc, GPR64noip:$AddrDisc)]>, Sched<[]> { let isCodeGenOnly = 1; let hasSideEffects = 1; let mayStore = 0; let mayLoad = 0; let isCall = 1; let Size = 12; // 4 fixed + 8 variable, to compute discriminator. let Defs = [X17,LR]; let Uses = [SP]; } def BLRA_RVMARKER : Pseudo< (outs), (ins i64imm:$rvfunc, GPR64noip:$Rn, i32imm:$Key, i64imm:$Disc, GPR64noip:$AddrDisc), [(AArch64authcall_rvmarker tglobaladdr:$rvfunc, GPR64noip:$Rn, timm:$Key, timm:$Disc, GPR64noip:$AddrDisc)]>, Sched<[]> { let isCodeGenOnly = 1; let isCall = 1; let Defs = [X17,LR]; let Uses = [SP]; } // BRA pseudo, generalized version of BRAA/BRAB/Z. // This directly manipulates x16/x17, which are the only registers the OS // guarantees are safe to use for sensitive operations. def BRA : Pseudo<(outs), (ins GPR64noip:$Rn, i32imm:$Key, i64imm:$Disc, GPR64noip:$AddrDisc), []>, Sched<[]> { let isCodeGenOnly = 1; let hasNoSchedulingInfo = 1; let hasSideEffects = 1; let mayStore = 0; let mayLoad = 0; let isBranch = 1; let isTerminator = 1; let isBarrier = 1; let isIndirectBranch = 1; let Size = 12; // 4 fixed + 8 variable, to compute discriminator. let Defs = [X17]; } let isReturn = 1, isTerminator = 1, isBarrier = 1 in { def RETAA : AuthReturn<0b010, 0, "retaa">; def RETAB : AuthReturn<0b010, 1, "retab">; def ERETAA : AuthReturn<0b100, 0, "eretaa">; def ERETAB : AuthReturn<0b100, 1, "eretab">; } defm LDRAA : AuthLoad<0, "ldraa", simm10Scaled>; defm LDRAB : AuthLoad<1, "ldrab", simm10Scaled>; // AUT pseudo. // This directly manipulates x16/x17, which are the only registers the OS // guarantees are safe to use for sensitive operations. def AUT : Pseudo<(outs), (ins i32imm:$Key, i64imm:$Disc, GPR64noip:$AddrDisc), []>, Sched<[WriteI, ReadI]> { let isCodeGenOnly = 1; let hasSideEffects = 1; let mayStore = 0; let mayLoad = 0; let Size = 32; let Defs = [X16,X17,NZCV]; let Uses = [X16]; } // AUT and re-PAC a value, using different keys/data. // This directly manipulates x16/x17, which are the only registers the OS // guarantees are safe to use for sensitive operations. def AUTPAC : Pseudo<(outs), (ins i32imm:$AUTKey, i64imm:$AUTDisc, GPR64noip:$AUTAddrDisc, i32imm:$PACKey, i64imm:$PACDisc, GPR64noip:$PACAddrDisc), []>, Sched<[WriteI, ReadI]> { let isCodeGenOnly = 1; let hasSideEffects = 1; let mayStore = 0; let mayLoad = 0; let Size = 48; let Defs = [X16,X17,NZCV]; let Uses = [X16]; } // Materialize a signed global address, with adrp+add and PAC. def MOVaddrPAC : Pseudo<(outs), (ins i64imm:$Addr, i32imm:$Key, GPR64noip:$AddrDisc, i64imm:$Disc), []>, Sched<[WriteI, ReadI]> { let isReMaterializable = 1; let isCodeGenOnly = 1; let Size = 40; // 12 fixed + 28 variable, for pointer offset, and discriminator let Defs = [X16,X17]; } // Materialize a signed global address, using a GOT load and PAC. def LOADgotPAC : Pseudo<(outs), (ins i64imm:$Addr, i32imm:$Key, GPR64noip:$AddrDisc, i64imm:$Disc), []>, Sched<[WriteI, ReadI]> { let isReMaterializable = 1; let isCodeGenOnly = 1; let Size = 40; // 12 fixed + 28 variable, for pointer offset, and discriminator let Defs = [X16,X17]; } // Load a signed global address from a special $auth_ptr$ stub slot. def LOADauthptrstatic : Pseudo<(outs GPR64:$dst), (ins i64imm:$Addr, i32imm:$Key, i64imm:$Disc), []>, Sched<[WriteI, ReadI]> { let isReMaterializable = 1; let isCodeGenOnly = 1; let Size = 8; } // Size 16: 4 fixed + 8 variable, to compute discriminator. let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Size = 16, Uses = [SP] in { def AUTH_TCRETURN : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff, i32imm:$Key, i64imm:$Disc, tcGPR64:$AddrDisc), []>, Sched<[WriteBrReg]>; def AUTH_TCRETURN_BTI : Pseudo<(outs), (ins tcGPRx16x17:$dst, i32imm:$FPDiff, i32imm:$Key, i64imm:$Disc, tcGPR64:$AddrDisc), []>, Sched<[WriteBrReg]>; } let Predicates = [TailCallAny] in def : Pat<(AArch64authtcret tcGPR64:$dst, (i32 timm:$FPDiff), (i32 timm:$Key), (i64 timm:$Disc), tcGPR64:$AddrDisc), (AUTH_TCRETURN tcGPR64:$dst, imm:$FPDiff, imm:$Key, imm:$Disc, tcGPR64:$AddrDisc)>; let Predicates = [TailCallX16X17] in def : Pat<(AArch64authtcret tcGPRx16x17:$dst, (i32 timm:$FPDiff), (i32 timm:$Key), (i64 timm:$Disc), tcGPR64:$AddrDisc), (AUTH_TCRETURN_BTI tcGPRx16x17:$dst, imm:$FPDiff, imm:$Key, imm:$Disc, tcGPR64:$AddrDisc)>; } // v9.5-A pointer authentication extensions // Always accept "pacm" as an alias for "hint #39", but don't emit it when // disassembling if we don't have the pauth-lr feature. let CRm = 0b0100 in { def PACM : SystemNoOperands<0b111, "hint\t#39">; } def : InstAlias<"pacm", (PACM), 0>; let Predicates = [HasPAuthLR] in { let Defs = [LR], Uses = [LR, SP] in { // opcode2, opcode, asm def PACIASPPC : SignAuthFixedRegs<0b00001, 0b101000, "paciasppc">; def PACIBSPPC : SignAuthFixedRegs<0b00001, 0b101001, "pacibsppc">; def PACNBIASPPC : SignAuthFixedRegs<0b00001, 0b100000, "pacnbiasppc">; def PACNBIBSPPC : SignAuthFixedRegs<0b00001, 0b100001, "pacnbibsppc">; // opc, asm def AUTIASPPCi : SignAuthPCRel<0b00, "autiasppc">; def AUTIBSPPCi : SignAuthPCRel<0b01, "autibsppc">; // opcode2, opcode, asm def AUTIASPPCr : SignAuthOneReg<0b00001, 0b100100, "autiasppcr">; def AUTIBSPPCr : SignAuthOneReg<0b00001, 0b100101, "autibsppcr">; // opcode2, opcode, asm def PACIA171615 : SignAuthFixedRegs<0b00001, 0b100010, "pacia171615">; def PACIB171615 : SignAuthFixedRegs<0b00001, 0b100011, "pacib171615">; def AUTIA171615 : SignAuthFixedRegs<0b00001, 0b101110, "autia171615">; def AUTIB171615 : SignAuthFixedRegs<0b00001, 0b101111, "autib171615">; } let Uses = [LR, SP], isReturn = 1, isTerminator = 1, isBarrier = 1 in { // opc, op2, asm def RETAASPPCi : SignAuthReturnPCRel<0b000, 0b11111, "retaasppc">; def RETABSPPCi : SignAuthReturnPCRel<0b001, 0b11111, "retabsppc">; // op3, asm def RETAASPPCr : SignAuthReturnReg<0b000010, "retaasppcr">; def RETABSPPCr : SignAuthReturnReg<0b000011, "retabsppcr">; } def : InstAlias<"pacm", (PACM), 1>; } // v8.3a floating point conversion for javascript let Predicates = [HasJS, HasFPARMv8], Defs = [NZCV] in def FJCVTZS : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32, "fjcvtzs", [(set GPR32:$Rd, (int_aarch64_fjcvtzs FPR64:$Rn))]> { let Inst{31} = 0; } // HasJS, HasFPARMv8 // v8.4 Flag manipulation instructions let Predicates = [HasFlagM], Defs = [NZCV], Uses = [NZCV] in { def CFINV : SimpleSystemI<0, (ins), "cfinv", "">, Sched<[WriteSys]> { let Inst{20-5} = 0b0000001000000000; } def SETF8 : BaseFlagManipulation<0, 0, (ins GPR32:$Rn), "setf8", "{\t$Rn}">; def SETF16 : BaseFlagManipulation<0, 1, (ins GPR32:$Rn), "setf16", "{\t$Rn}">; def RMIF : FlagRotate<(ins GPR64:$Rn, uimm6:$imm, imm0_15:$mask), "rmif", "{\t$Rn, $imm, $mask}">; } // HasFlagM // v8.5 flag manipulation instructions let Predicates = [HasAltNZCV], Uses = [NZCV], Defs = [NZCV] in { def XAFLAG : PstateWriteSimple<(ins), "xaflag", "">, Sched<[WriteSys]> { let Inst{18-16} = 0b000; let Inst{11-8} = 0b0000; let Unpredictable{11-8} = 0b1111; let Inst{7-5} = 0b001; } def AXFLAG : PstateWriteSimple<(ins), "axflag", "">, Sched<[WriteSys]> { let Inst{18-16} = 0b000; let Inst{11-8} = 0b0000; let Unpredictable{11-8} = 0b1111; let Inst{7-5} = 0b010; } } // HasAltNZCV // Armv8.5-A speculation barrier def SB : SimpleSystemI<0, (ins), "sb", "">, Sched<[]> { let Inst{20-5} = 0b0001100110000111; let Unpredictable{11-8} = 0b1111; let Predicates = [HasSB]; let hasSideEffects = 1; } def : InstAlias<"clrex", (CLREX 0xf)>; def : InstAlias<"isb", (ISB 0xf)>; def : InstAlias<"ssbb", (DSB 0)>; def : InstAlias<"pssbb", (DSB 4)>; def : InstAlias<"dfb", (DSB 0b1100)>, Requires<[HasV8_0r]>; def MRS : MRSI; def MSR : MSRI; def MSRpstateImm1 : MSRpstateImm0_1; def MSRpstateImm4 : MSRpstateImm0_15; def : Pat<(AArch64mrs imm:$id), (MRS imm:$id)>; // The thread pointer (on Linux, at least, where this has been implemented) is // TPIDR_EL0. def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins), [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[WriteSys]>; // This gets lowered into a 24-byte instruction sequence let Defs = [ X9, X16, X17, NZCV ], Size = 24 in { def KCFI_CHECK : Pseudo< (outs), (ins GPR64:$ptr, i32imm:$type), []>, Sched<[]>; } let Uses = [ X9 ], Defs = [ X16, X17, LR, NZCV ] in { def HWASAN_CHECK_MEMACCESS : Pseudo< (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo), [(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 timm:$accessinfo))]>, Sched<[]>; } let Uses = [ X20 ], Defs = [ X16, X17, LR, NZCV ] in { def HWASAN_CHECK_MEMACCESS_SHORTGRANULES : Pseudo< (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo), [(int_hwasan_check_memaccess_shortgranules X20, GPR64noip:$ptr, (i32 timm:$accessinfo))]>, Sched<[]>; } let Defs = [ X16, X17, LR, NZCV ] in { def HWASAN_CHECK_MEMACCESS_FIXEDSHADOW : Pseudo< (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo, i64imm:$fixed_shadow), [(int_hwasan_check_memaccess_fixedshadow GPR64noip:$ptr, (i32 timm:$accessinfo), (i64 timm:$fixed_shadow))]>, Sched<[]>; } let Defs = [ X16, X17, LR, NZCV ] in { def HWASAN_CHECK_MEMACCESS_SHORTGRANULES_FIXEDSHADOW : Pseudo< (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo, i64imm:$fixed_shadow), [(int_hwasan_check_memaccess_shortgranules_fixedshadow GPR64noip:$ptr, (i32 timm:$accessinfo), (i64 timm:$fixed_shadow))]>, Sched<[]>; } // The virtual cycle counter register is CNTVCT_EL0. def : Pat<(readcyclecounter), (MRS 0xdf02)>; // FPCR and FPSR registers. let Uses = [FPCR] in def MRS_FPCR : Pseudo<(outs GPR64:$dst), (ins), [(set GPR64:$dst, (int_aarch64_get_fpcr))]>, PseudoInstExpansion<(MRS GPR64:$dst, 0xda20)>, Sched<[WriteSys]>; let Defs = [FPCR] in def MSR_FPCR : Pseudo<(outs), (ins GPR64:$val), [(int_aarch64_set_fpcr i64:$val)]>, PseudoInstExpansion<(MSR 0xda20, GPR64:$val)>, Sched<[WriteSys]>; let Uses = [FPSR] in def MRS_FPSR : Pseudo<(outs GPR64:$dst), (ins), [(set GPR64:$dst, (int_aarch64_get_fpsr))]>, PseudoInstExpansion<(MRS GPR64:$dst, 0xda21)>, Sched<[WriteSys]>; let Defs = [FPSR] in def MSR_FPSR : Pseudo<(outs), (ins GPR64:$val), [(int_aarch64_set_fpsr i64:$val)]>, PseudoInstExpansion<(MSR 0xda21, GPR64:$val)>, Sched<[WriteSys]>; // Generic system instructions def SYSxt : SystemXtI<0, "sys">; def SYSLxt : SystemLXtI<1, "sysl">; def : InstAlias<"sys $op1, $Cn, $Cm, $op2", (SYSxt imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, XZR)>; let Predicates = [HasTME] in { def TSTART : TMSystemI<0b0000, "tstart", [(set GPR64:$Rt, (int_aarch64_tstart))]>; def TCOMMIT : TMSystemINoOperand<0b0000, "tcommit", [(int_aarch64_tcommit)]>; def TCANCEL : TMSystemException<0b011, "tcancel", [(int_aarch64_tcancel timm64_0_65535:$imm)]>; def TTEST : TMSystemI<0b0001, "ttest", [(set GPR64:$Rt, (int_aarch64_ttest))]> { let mayLoad = 0; let mayStore = 0; } } // HasTME //===----------------------------------------------------------------------===// // Move immediate instructions. //===----------------------------------------------------------------------===// defm MOVK : InsertImmediate<0b11, "movk">; defm MOVN : MoveImmediate<0b00, "movn">; let PostEncoderMethod = "fixMOVZ" in defm MOVZ : MoveImmediate<0b10, "movz">; // First group of aliases covers an implicit "lsl #0". def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, timm32_0_65535:$imm, 0), 0>; def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, timm32_0_65535:$imm, 0), 0>; def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, timm32_0_65535:$imm, 0)>; def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, timm32_0_65535:$imm, 0)>; def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, timm32_0_65535:$imm, 0)>; def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, timm32_0_65535:$imm, 0)>; // Next, we have various ELF relocations with the ":XYZ_g0:sym" syntax. def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>; def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>; def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>; def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>; def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>; def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>; def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>; def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>; def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g3:$sym, 48), 0>; def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g2:$sym, 32), 0>; def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g1:$sym, 16), 0>; def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g0:$sym, 0), 0>; def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>; def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>; def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>; def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>; def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g1:$sym, 16), 0>; def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g0:$sym, 0), 0>; // Final group of aliases covers true "mov $Rd, $imm" cases. multiclass movw_mov_alias { def _asmoperand : AsmOperandClass { let Name = basename # width # "_lsl" # shift # "MovAlias"; let PredicateMethod = "is" # basename # "MovAlias<" # width # ", " # shift # ">"; let RenderMethod = "add" # basename # "MovAliasOperands<" # shift # ">"; } def _movimm : Operand { let ParserMatchClass = !cast(NAME # "_asmoperand"); } def : InstAlias<"mov $Rd, $imm", (INST GPR:$Rd, !cast(NAME # "_movimm"):$imm, shift)>; } defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 0>; defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 16>; defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 0>; defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 16>; defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 32>; defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 48>; defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 0>; defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 16>; defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 0>; defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 16>; defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 32>; defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 48>; let isReMaterializable = 1, isCodeGenOnly = 1, isMoveImm = 1, isAsCheapAsAMove = 1 in { // FIXME: The following pseudo instructions are only needed because remat // cannot handle multiple instructions. When that changes, we can select // directly to the real instructions and get rid of these pseudos. def MOVi32imm : Pseudo<(outs GPR32:$dst), (ins i32imm:$src), [(set GPR32:$dst, imm:$src)]>, Sched<[WriteImm]>; def MOVi64imm : Pseudo<(outs GPR64:$dst), (ins i64imm:$src), [(set GPR64:$dst, imm:$src)]>, Sched<[WriteImm]>; } // isReMaterializable, isCodeGenOnly // If possible, we want to use MOVi32imm even for 64-bit moves. This gives the // eventual expansion code fewer bits to worry about getting right. Marshalling // the types is a little tricky though: def i64imm_32bit : ImmLeaf(Imm); }]>; def s64imm_32bit : ImmLeaf(Imm); return Imm64 >= std::numeric_limits::min() && Imm64 <= std::numeric_limits::max(); }]>; def trunc_imm : SDNodeXFormgetTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32); }]>; def gi_trunc_imm : GICustomOperandRenderer<"renderTruncImm">, GISDNodeXFormEquiv; let Predicates = [OptimizedGISelOrOtherSelector] in { // The SUBREG_TO_REG isn't eliminated at -O0, which can result in pointless // copies. def : Pat<(i64 i64imm_32bit:$src), (SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$src)), sub_32)>; } // Materialize FP constants via MOVi32imm/MOVi64imm (MachO large code model). def bitcast_fpimm_to_i32 : SDNodeXFormgetTargetConstant( N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32); }]>; def bitcast_fpimm_to_i64 : SDNodeXFormgetTargetConstant( N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64); }]>; def : Pat<(f32 fpimm:$in), (COPY_TO_REGCLASS (MOVi32imm (bitcast_fpimm_to_i32 f32:$in)), FPR32)>; def : Pat<(f64 fpimm:$in), (COPY_TO_REGCLASS (MOVi64imm (bitcast_fpimm_to_i64 f64:$in)), FPR64)>; // Deal with the various forms of (ELF) large addressing with MOVZ/MOVK // sequences. def : Pat<(AArch64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2, tglobaladdr:$g1, tglobaladdr:$g0), (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g0, 0), tglobaladdr:$g1, 16), tglobaladdr:$g2, 32), tglobaladdr:$g3, 48)>; def : Pat<(AArch64WrapperLarge tblockaddress:$g3, tblockaddress:$g2, tblockaddress:$g1, tblockaddress:$g0), (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g0, 0), tblockaddress:$g1, 16), tblockaddress:$g2, 32), tblockaddress:$g3, 48)>; def : Pat<(AArch64WrapperLarge tconstpool:$g3, tconstpool:$g2, tconstpool:$g1, tconstpool:$g0), (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g0, 0), tconstpool:$g1, 16), tconstpool:$g2, 32), tconstpool:$g3, 48)>; def : Pat<(AArch64WrapperLarge tjumptable:$g3, tjumptable:$g2, tjumptable:$g1, tjumptable:$g0), (MOVKXi (MOVKXi (MOVKXi (MOVZXi tjumptable:$g0, 0), tjumptable:$g1, 16), tjumptable:$g2, 32), tjumptable:$g3, 48)>; //===----------------------------------------------------------------------===// // Arithmetic instructions. //===----------------------------------------------------------------------===// // Add/subtract with carry. defm ADC : AddSubCarry<0, "adc", "adcs", AArch64adc, AArch64adc_flag>; defm SBC : AddSubCarry<1, "sbc", "sbcs", AArch64sbc, AArch64sbc_flag>; def : InstAlias<"ngc $dst, $src", (SBCWr GPR32:$dst, WZR, GPR32:$src)>; def : InstAlias<"ngc $dst, $src", (SBCXr GPR64:$dst, XZR, GPR64:$src)>; def : InstAlias<"ngcs $dst, $src", (SBCSWr GPR32:$dst, WZR, GPR32:$src)>; def : InstAlias<"ngcs $dst, $src", (SBCSXr GPR64:$dst, XZR, GPR64:$src)>; // Add/subtract defm ADD : AddSub<0, "add", "sub", add>; defm SUB : AddSub<1, "sub", "add">; def : InstAlias<"mov $dst, $src", (ADDWri GPR32sponly:$dst, GPR32sp:$src, 0, 0)>; def : InstAlias<"mov $dst, $src", (ADDWri GPR32sp:$dst, GPR32sponly:$src, 0, 0)>; def : InstAlias<"mov $dst, $src", (ADDXri GPR64sponly:$dst, GPR64sp:$src, 0, 0)>; def : InstAlias<"mov $dst, $src", (ADDXri GPR64sp:$dst, GPR64sponly:$src, 0, 0)>; defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn", "subs", "cmp">; defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp", "adds", "cmn">; def copyFromSP: PatLeaf<(i64 GPR64:$src), [{ return N->getOpcode() == ISD::CopyFromReg && cast(N->getOperand(1))->getReg() == AArch64::SP; }]>; // Use SUBS instead of SUB to enable CSE between SUBS and SUB. def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm), (SUBSWri GPR32sp:$Rn, addsub_shifted_imm32:$imm)>; def : Pat<(sub GPR64sp:$Rn, addsub_shifted_imm64:$imm), (SUBSXri GPR64sp:$Rn, addsub_shifted_imm64:$imm)>; def : Pat<(sub GPR32:$Rn, GPR32:$Rm), (SUBSWrr GPR32:$Rn, GPR32:$Rm)>; def : Pat<(sub GPR64:$Rn, GPR64:$Rm), (SUBSXrr GPR64:$Rn, GPR64:$Rm)>; def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm), (SUBSWrs GPR32:$Rn, arith_shifted_reg32:$Rm)>; def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm), (SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>; let AddedComplexity = 1 in { def : Pat<(sub GPR32sp:$R2, arith_extended_reg32_i32:$R3), (SUBSWrx GPR32sp:$R2, arith_extended_reg32_i32:$R3)>; def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64_i64:$R3), (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64_i64:$R3)>; def : Pat<(sub copyFromSP:$R2, (arith_uxtx GPR64:$R3, arith_extendlsl64:$imm)), (SUBXrx64 GPR64sp:$R2, GPR64:$R3, arith_extendlsl64:$imm)>; } // Because of the immediate format for add/sub-imm instructions, the // expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1). // These patterns capture that transformation. let AddedComplexity = 1 in { def : Pat<(add GPR32:$Rn, neg_addsub_shifted_imm32:$imm), (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; def : Pat<(add GPR64:$Rn, neg_addsub_shifted_imm64:$imm), (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; def : Pat<(sub GPR32:$Rn, neg_addsub_shifted_imm32:$imm), (ADDWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; def : Pat<(sub GPR64:$Rn, neg_addsub_shifted_imm64:$imm), (ADDXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; } // Because of the immediate format for add/sub-imm instructions, the // expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1). // These patterns capture that transformation. let AddedComplexity = 1 in { def : Pat<(AArch64add_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm), (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; def : Pat<(AArch64add_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; def : Pat<(AArch64sub_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm), (ADDSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; def : Pat<(AArch64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), (ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; } def : InstAlias<"neg $dst, $src", (SUBWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>; def : InstAlias<"neg $dst, $src", (SUBXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>; def : InstAlias<"neg $dst, $src$shift", (SUBWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>; def : InstAlias<"neg $dst, $src$shift", (SUBXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>; def : InstAlias<"negs $dst, $src", (SUBSWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>; def : InstAlias<"negs $dst, $src", (SUBSXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>; def : InstAlias<"negs $dst, $src$shift", (SUBSWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>; def : InstAlias<"negs $dst, $src$shift", (SUBSXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>; // Unsigned/Signed divide defm UDIV : Div<0, "udiv", udiv>; defm SDIV : Div<1, "sdiv", sdiv>; def : Pat<(int_aarch64_udiv GPR32:$Rn, GPR32:$Rm), (UDIVWr GPR32:$Rn, GPR32:$Rm)>; def : Pat<(int_aarch64_udiv GPR64:$Rn, GPR64:$Rm), (UDIVXr GPR64:$Rn, GPR64:$Rm)>; def : Pat<(int_aarch64_sdiv GPR32:$Rn, GPR32:$Rm), (SDIVWr GPR32:$Rn, GPR32:$Rm)>; def : Pat<(int_aarch64_sdiv GPR64:$Rn, GPR64:$Rm), (SDIVXr GPR64:$Rn, GPR64:$Rm)>; // Variable shift defm ASRV : Shift<0b10, "asr", sra>; defm LSLV : Shift<0b00, "lsl", shl>; defm LSRV : Shift<0b01, "lsr", srl>; defm RORV : Shift<0b11, "ror", rotr>; def : ShiftAlias<"asrv", ASRVWr, GPR32>; def : ShiftAlias<"asrv", ASRVXr, GPR64>; def : ShiftAlias<"lslv", LSLVWr, GPR32>; def : ShiftAlias<"lslv", LSLVXr, GPR64>; def : ShiftAlias<"lsrv", LSRVWr, GPR32>; def : ShiftAlias<"lsrv", LSRVXr, GPR64>; def : ShiftAlias<"rorv", RORVWr, GPR32>; def : ShiftAlias<"rorv", RORVXr, GPR64>; // Multiply-add let AddedComplexity = 5 in { defm MADD : MulAccum<0, "madd">; defm MSUB : MulAccum<1, "msub">; def : Pat<(i32 (mul GPR32:$Rn, GPR32:$Rm)), (MADDWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; def : Pat<(i64 (mul GPR64:$Rn, GPR64:$Rm)), (MADDXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; def : Pat<(i32 (ineg (mul GPR32:$Rn, GPR32:$Rm))), (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; def : Pat<(i64 (ineg (mul GPR64:$Rn, GPR64:$Rm))), (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; def : Pat<(i32 (mul (ineg GPR32:$Rn), GPR32:$Rm)), (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; def : Pat<(i64 (mul (ineg GPR64:$Rn), GPR64:$Rm)), (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; } // AddedComplexity = 5 let AddedComplexity = 5 in { def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>; def SMSUBLrrr : WideMulAccum<1, 0b001, "smsubl", sub, sext>; def UMADDLrrr : WideMulAccum<0, 0b101, "umaddl", add, zext>; def UMSUBLrrr : WideMulAccum<1, 0b101, "umsubl", sub, zext>; def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext_inreg GPR64:$Rm, i32))), (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext GPR32:$Rm))), (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; def : Pat<(i64 (mul (sext GPR32:$Rn), (sext GPR32:$Rm))), (SMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (and GPR64:$Rm, 0xFFFFFFFF))), (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (zext GPR32:$Rm))), (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; def : Pat<(i64 (mul (zext GPR32:$Rn), (zext GPR32:$Rm))), (UMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (sext GPR32:$Rm)))), (SMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (zext GPR32:$Rm)))), (UMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; def : Pat<(i64 (mul (sext GPR32:$Rn), (s64imm_32bit:$C))), (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; def : Pat<(i64 (mul (zext GPR32:$Rn), (i64imm_32bit:$C))), (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C))), (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), (MOVi32imm (trunc_imm imm:$C)), XZR)>; def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))), (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))), (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; def : Pat<(i64 (ineg (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)))), (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), (MOVi32imm (trunc_imm imm:$C)), XZR)>; def : Pat<(i64 (add (mul (sext GPR32:$Rn), (s64imm_32bit:$C)), GPR64:$Ra)), (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; def : Pat<(i64 (add (mul (zext GPR32:$Rn), (i64imm_32bit:$C)), GPR64:$Ra)), (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; def : Pat<(i64 (add (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)), GPR64:$Ra)), (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; def : Pat<(i64 (sub GPR64:$Ra, (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))), (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; def : Pat<(i64 (sub GPR64:$Ra, (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))), (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; def : Pat<(i64 (sub GPR64:$Ra, (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)))), (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; def : Pat<(i64 (smullwithsignbits GPR64:$Rn, GPR64:$Rm)), (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; def : Pat<(i64 (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm))), (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; def : Pat<(i64 (add (smullwithsignbits GPR64:$Rn, GPR64:$Rm), GPR64:$Ra)), (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>; def : Pat<(i64 (add (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)), GPR64:$Ra)), (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>; def : Pat<(i64 (ineg (smullwithsignbits GPR64:$Rn, GPR64:$Rm))), (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; def : Pat<(i64 (ineg (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)))), (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; def : Pat<(i64 (sub GPR64:$Ra, (smullwithsignbits GPR64:$Rn, GPR64:$Rm))), (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>; def : Pat<(i64 (sub GPR64:$Ra, (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)))), (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>; def : Pat<(i64 (mul top32Zero:$Rn, top32Zero:$Rm)), (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; def : Pat<(i64 (mul top32Zero:$Rn, (zext GPR32:$Rm))), (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; def : Pat<(i64 (add (mul top32Zero:$Rn, top32Zero:$Rm), GPR64:$Ra)), (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>; def : Pat<(i64 (add (mul top32Zero:$Rn, (zext GPR32:$Rm)), GPR64:$Ra)), (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>; def : Pat<(i64 (ineg (mul top32Zero:$Rn, top32Zero:$Rm))), (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; def : Pat<(i64 (ineg (mul top32Zero:$Rn, (zext GPR32:$Rm)))), (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; def : Pat<(i64 (sub GPR64:$Ra, (mul top32Zero:$Rn, top32Zero:$Rm))), (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>; def : Pat<(i64 (sub GPR64:$Ra, (mul top32Zero:$Rn, (zext GPR32:$Rm)))), (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>; } // AddedComplexity = 5 def : MulAccumWAlias<"mul", MADDWrrr>; def : MulAccumXAlias<"mul", MADDXrrr>; def : MulAccumWAlias<"mneg", MSUBWrrr>; def : MulAccumXAlias<"mneg", MSUBXrrr>; def : WideMulAccumAlias<"smull", SMADDLrrr>; def : WideMulAccumAlias<"smnegl", SMSUBLrrr>; def : WideMulAccumAlias<"umull", UMADDLrrr>; def : WideMulAccumAlias<"umnegl", UMSUBLrrr>; // Multiply-high def SMULHrr : MulHi<0b010, "smulh", mulhs>; def UMULHrr : MulHi<0b110, "umulh", mulhu>; // CRC32 def CRC32Brr : BaseCRC32<0, 0b00, 0, GPR32, int_aarch64_crc32b, "crc32b">; def CRC32Hrr : BaseCRC32<0, 0b01, 0, GPR32, int_aarch64_crc32h, "crc32h">; def CRC32Wrr : BaseCRC32<0, 0b10, 0, GPR32, int_aarch64_crc32w, "crc32w">; def CRC32Xrr : BaseCRC32<1, 0b11, 0, GPR64, int_aarch64_crc32x, "crc32x">; def CRC32CBrr : BaseCRC32<0, 0b00, 1, GPR32, int_aarch64_crc32cb, "crc32cb">; def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_aarch64_crc32ch, "crc32ch">; def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_aarch64_crc32cw, "crc32cw">; def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">; // v8.1 atomic CAS defm CAS : CompareAndSwap<0, 0, "">; defm CASA : CompareAndSwap<1, 0, "a">; defm CASL : CompareAndSwap<0, 1, "l">; defm CASAL : CompareAndSwap<1, 1, "al">; // v8.1 atomic CASP defm CASP : CompareAndSwapPair<0, 0, "">; defm CASPA : CompareAndSwapPair<1, 0, "a">; defm CASPL : CompareAndSwapPair<0, 1, "l">; defm CASPAL : CompareAndSwapPair<1, 1, "al">; // v8.1 atomic SWP defm SWP : Swap<0, 0, "">; defm SWPA : Swap<1, 0, "a">; defm SWPL : Swap<0, 1, "l">; defm SWPAL : Swap<1, 1, "al">; // v8.1 atomic LD(register). Performs load and then ST(register) defm LDADD : LDOPregister<0b000, "add", 0, 0, "">; defm LDADDA : LDOPregister<0b000, "add", 1, 0, "a">; defm LDADDL : LDOPregister<0b000, "add", 0, 1, "l">; defm LDADDAL : LDOPregister<0b000, "add", 1, 1, "al">; defm LDCLR : LDOPregister<0b001, "clr", 0, 0, "">; defm LDCLRA : LDOPregister<0b001, "clr", 1, 0, "a">; defm LDCLRL : LDOPregister<0b001, "clr", 0, 1, "l">; defm LDCLRAL : LDOPregister<0b001, "clr", 1, 1, "al">; defm LDEOR : LDOPregister<0b010, "eor", 0, 0, "">; defm LDEORA : LDOPregister<0b010, "eor", 1, 0, "a">; defm LDEORL : LDOPregister<0b010, "eor", 0, 1, "l">; defm LDEORAL : LDOPregister<0b010, "eor", 1, 1, "al">; defm LDSET : LDOPregister<0b011, "set", 0, 0, "">; defm LDSETA : LDOPregister<0b011, "set", 1, 0, "a">; defm LDSETL : LDOPregister<0b011, "set", 0, 1, "l">; defm LDSETAL : LDOPregister<0b011, "set", 1, 1, "al">; defm LDSMAX : LDOPregister<0b100, "smax", 0, 0, "">; defm LDSMAXA : LDOPregister<0b100, "smax", 1, 0, "a">; defm LDSMAXL : LDOPregister<0b100, "smax", 0, 1, "l">; defm LDSMAXAL : LDOPregister<0b100, "smax", 1, 1, "al">; defm LDSMIN : LDOPregister<0b101, "smin", 0, 0, "">; defm LDSMINA : LDOPregister<0b101, "smin", 1, 0, "a">; defm LDSMINL : LDOPregister<0b101, "smin", 0, 1, "l">; defm LDSMINAL : LDOPregister<0b101, "smin", 1, 1, "al">; defm LDUMAX : LDOPregister<0b110, "umax", 0, 0, "">; defm LDUMAXA : LDOPregister<0b110, "umax", 1, 0, "a">; defm LDUMAXL : LDOPregister<0b110, "umax", 0, 1, "l">; defm LDUMAXAL : LDOPregister<0b110, "umax", 1, 1, "al">; defm LDUMIN : LDOPregister<0b111, "umin", 0, 0, "">; defm LDUMINA : LDOPregister<0b111, "umin", 1, 0, "a">; defm LDUMINL : LDOPregister<0b111, "umin", 0, 1, "l">; defm LDUMINAL : LDOPregister<0b111, "umin", 1, 1, "al">; // v8.1 atomic ST(register) as aliases to "LD(register) when Rt=xZR" defm : STOPregister<"stadd","LDADD">; // STADDx defm : STOPregister<"stclr","LDCLR">; // STCLRx defm : STOPregister<"steor","LDEOR">; // STEORx defm : STOPregister<"stset","LDSET">; // STSETx defm : STOPregister<"stsmax","LDSMAX">;// STSMAXx defm : STOPregister<"stsmin","LDSMIN">;// STSMINx defm : STOPregister<"stumax","LDUMAX">;// STUMAXx defm : STOPregister<"stumin","LDUMIN">;// STUMINx // v8.5 Memory Tagging Extension let Predicates = [HasMTE] in { def IRG : BaseTwoOperandRegReg<0b1, 0b0, 0b000100, GPR64sp, "irg", int_aarch64_irg, GPR64sp, GPR64>, Sched<[]>; def GMI : BaseTwoOperandRegReg<0b1, 0b0, 0b000101, GPR64, "gmi", int_aarch64_gmi, GPR64sp>, Sched<[]> { let isNotDuplicable = 1; } def ADDG : AddSubG<0, "addg", null_frag>; def SUBG : AddSubG<1, "subg", null_frag>; def : InstAlias<"irg $dst, $src", (IRG GPR64sp:$dst, GPR64sp:$src, XZR), 1>; def SUBP : SUBP<0, "subp", int_aarch64_subp>, Sched<[]>; def SUBPS : SUBP<1, "subps", null_frag>, Sched<[]>{ let Defs = [NZCV]; } def : InstAlias<"cmpp $lhs, $rhs", (SUBPS XZR, GPR64sp:$lhs, GPR64sp:$rhs), 0>; def LDG : MemTagLoad<"ldg", "\t$Rt, [$Rn, $offset]">; def : Pat<(int_aarch64_addg (am_indexedu6s128 GPR64sp:$Rn, uimm6s16:$imm6), imm0_15:$imm4), (ADDG GPR64sp:$Rn, imm0_63:$imm6, imm0_15:$imm4)>; def : Pat<(int_aarch64_ldg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)), (LDG GPR64:$Rt, GPR64sp:$Rn, simm9s16:$offset)>; def : InstAlias<"ldg $Rt, [$Rn]", (LDG GPR64:$Rt, GPR64sp:$Rn, 0), 1>; def LDGM : MemTagVector<1, "ldgm", "\t$Rt, [$Rn]", (outs GPR64:$Rt), (ins GPR64sp:$Rn)>; def STGM : MemTagVector<0, "stgm", "\t$Rt, [$Rn]", (outs), (ins GPR64:$Rt, GPR64sp:$Rn)>; def STZGM : MemTagVector<0, "stzgm", "\t$Rt, [$Rn]", (outs), (ins GPR64:$Rt, GPR64sp:$Rn)> { let Inst{23} = 0; } defm STG : MemTagStore<0b00, "stg">; defm STZG : MemTagStore<0b01, "stzg">; defm ST2G : MemTagStore<0b10, "st2g">; defm STZ2G : MemTagStore<0b11, "stz2g">; def : Pat<(AArch64stg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)), (STGi $Rn, $Rm, $imm)>; def : Pat<(AArch64stzg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)), (STZGi $Rn, $Rm, $imm)>; def : Pat<(AArch64st2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)), (ST2Gi $Rn, $Rm, $imm)>; def : Pat<(AArch64stz2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)), (STZ2Gi $Rn, $Rm, $imm)>; defm STGP : StorePairOffset <0b01, 0, GPR64z, simm7s16, "stgp">; def STGPpre : StorePairPreIdx <0b01, 0, GPR64z, simm7s16, "stgp">; def STGPpost : StorePairPostIdx<0b01, 0, GPR64z, simm7s16, "stgp">; def : Pat<(int_aarch64_stg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)), (STGi GPR64:$Rt, GPR64sp:$Rn, simm9s16:$offset)>; def : Pat<(int_aarch64_stgp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$imm), GPR64:$Rt, GPR64:$Rt2), (STGPi $Rt, $Rt2, $Rn, $imm)>; def IRGstack : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rsp, GPR64:$Rm), []>, Sched<[]>; def TAGPstack : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rn, uimm6s16:$imm6, GPR64sp:$Rm, imm0_15:$imm4), []>, Sched<[]>; // Explicit SP in the first operand prevents ShrinkWrap optimization // from leaving this instruction out of the stack frame. When IRGstack // is transformed into IRG, this operand is replaced with the actual // register / expression for the tagged base pointer of the current function. def : Pat<(int_aarch64_irg_sp i64:$Rm), (IRGstack SP, i64:$Rm)>; // Large STG to be expanded into a loop. $sz is the size, $Rn is start address. // $Rn_wback is one past the end of the range. $Rm is the loop counter. let isCodeGenOnly=1, mayStore=1, Defs=[NZCV] in { def STGloop_wback : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn), [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >, Sched<[WriteAdr, WriteST]>; def STZGloop_wback : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn), [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >, Sched<[WriteAdr, WriteST]>; // A variant of the above where $Rn2 is an independent register not tied to the input register $Rn. // Their purpose is to use a FrameIndex operand as $Rn (which of course can not be written back). def STGloop : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn2), (ins i64imm:$sz, GPR64sp:$Rn), [], "@earlyclobber $Rn2,@earlyclobber $Rm" >, Sched<[WriteAdr, WriteST]>; def STZGloop : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn2), (ins i64imm:$sz, GPR64sp:$Rn), [], "@earlyclobber $Rn2,@earlyclobber $Rm" >, Sched<[WriteAdr, WriteST]>; } } // Predicates = [HasMTE] //===----------------------------------------------------------------------===// // Logical instructions. //===----------------------------------------------------------------------===// // (immediate) defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag, "bics">; defm AND : LogicalImm<0b00, "and", and, "bic">; defm EOR : LogicalImm<0b10, "eor", xor, "eon">; defm ORR : LogicalImm<0b01, "orr", or, "orn">; // FIXME: these aliases *are* canonical sometimes (when movz can't be // used). Actually, it seems to be working right now, but putting logical_immXX // here is a bit dodgy on the AsmParser side too. def : InstAlias<"mov $dst, $imm", (ORRWri GPR32sp:$dst, WZR, logical_imm32:$imm), 0>; def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR, logical_imm64:$imm), 0>; // (register) defm ANDS : LogicalRegS<0b11, 0, "ands", AArch64and_flag>; defm BICS : LogicalRegS<0b11, 1, "bics", BinOpFrag<(AArch64and_flag node:$LHS, (not node:$RHS))>>; defm AND : LogicalReg<0b00, 0, "and", and>; defm BIC : LogicalReg<0b00, 1, "bic", BinOpFrag<(and node:$LHS, (not node:$RHS))>, 3>; defm EON : LogicalReg<0b10, 1, "eon", BinOpFrag<(not (xor node:$LHS, node:$RHS))>>; defm EOR : LogicalReg<0b10, 0, "eor", xor>; defm ORN : LogicalReg<0b01, 1, "orn", BinOpFrag<(or node:$LHS, (not node:$RHS))>>; defm ORR : LogicalReg<0b01, 0, "orr", or>; def : InstAlias<"mov $dst, $src", (ORRWrs GPR32:$dst, WZR, GPR32:$src, 0), 2>; def : InstAlias<"mov $dst, $src", (ORRXrs GPR64:$dst, XZR, GPR64:$src, 0), 2>; def : InstAlias<"mvn $Wd, $Wm", (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, 0), 3>; def : InstAlias<"mvn $Xd, $Xm", (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, 0), 3>; def : InstAlias<"mvn $Wd, $Wm$sh", (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, logical_shift32:$sh), 2>; def : InstAlias<"mvn $Xd, $Xm$sh", (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, logical_shift64:$sh), 2>; def : InstAlias<"tst $src1, $src2", (ANDSWri WZR, GPR32:$src1, logical_imm32:$src2), 2>; def : InstAlias<"tst $src1, $src2", (ANDSXri XZR, GPR64:$src1, logical_imm64:$src2), 2>; def : InstAlias<"tst $src1, $src2", (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, 0), 3>; def : InstAlias<"tst $src1, $src2", (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, 0), 3>; def : InstAlias<"tst $src1, $src2$sh", (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, logical_shift32:$sh), 2>; def : InstAlias<"tst $src1, $src2$sh", (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, logical_shift64:$sh), 2>; def : Pat<(not GPR32:$Wm), (ORNWrr WZR, GPR32:$Wm)>; def : Pat<(not GPR64:$Xm), (ORNXrr XZR, GPR64:$Xm)>; // Emit (and 0xFFFFFFFF) as a ORRWrr move which may be eliminated. let AddedComplexity = 6 in def : Pat<(i64 (and GPR64:$Rn, 0xffffffff)), (SUBREG_TO_REG (i64 0), (ORRWrr WZR, (EXTRACT_SUBREG GPR64:$Rn, sub_32)), sub_32)>; //===----------------------------------------------------------------------===// // One operand data processing instructions. //===----------------------------------------------------------------------===// defm CLS : OneOperandData<0b000101, "cls">; defm CLZ : OneOperandData<0b000100, "clz", ctlz>; defm RBIT : OneOperandData<0b000000, "rbit", bitreverse>; def REV16Wr : OneWRegData<0b000001, "rev16", UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>; def REV16Xr : OneXRegData<0b000001, "rev16", null_frag>; def : Pat<(cttz GPR32:$Rn), (CLZWr (RBITWr GPR32:$Rn))>; def : Pat<(cttz GPR64:$Rn), (CLZXr (RBITXr GPR64:$Rn))>; def : Pat<(ctlz (or (shl (xor (sra GPR32:$Rn, (i64 31)), GPR32:$Rn), (i64 1)), (i32 1))), (CLSWr GPR32:$Rn)>; def : Pat<(ctlz (or (shl (xor (sra GPR64:$Rn, (i64 63)), GPR64:$Rn), (i64 1)), (i64 1))), (CLSXr GPR64:$Rn)>; def : Pat<(int_aarch64_cls GPR32:$Rn), (CLSWr GPR32:$Rn)>; def : Pat<(int_aarch64_cls64 GPR64:$Rm), (EXTRACT_SUBREG (CLSXr GPR64:$Rm), sub_32)>; // Unlike the other one operand instructions, the instructions with the "rev" // mnemonic do *not* just different in the size bit, but actually use different // opcode bits for the different sizes. def REVWr : OneWRegData<0b000010, "rev", bswap>; def REVXr : OneXRegData<0b000011, "rev", bswap>; def REV32Xr : OneXRegData<0b000010, "rev32", UnOpFrag<(rotr (bswap node:$LHS), (i64 32))>>; def : InstAlias<"rev64 $Rd, $Rn", (REVXr GPR64:$Rd, GPR64:$Rn), 0>; // The bswap commutes with the rotr so we want a pattern for both possible // orders. def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>; def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>; // Match (srl (bswap x), C) -> revC if the upper bswap bits are known zero. def : Pat<(srl (bswap top16Zero:$Rn), (i64 16)), (REV16Wr GPR32:$Rn)>; def : Pat<(srl (bswap top32Zero:$Rn), (i64 32)), (REV32Xr GPR64:$Rn)>; def : Pat<(or (and (srl GPR64:$Rn, (i64 8)), (i64 0x00ff00ff00ff00ff)), (and (shl GPR64:$Rn, (i64 8)), (i64 0xff00ff00ff00ff00))), (REV16Xr GPR64:$Rn)>; //===----------------------------------------------------------------------===// // Bitfield immediate extraction instruction. //===----------------------------------------------------------------------===// let hasSideEffects = 0 in defm EXTR : ExtractImm<"extr">; def : InstAlias<"ror $dst, $src, $shift", (EXTRWrri GPR32:$dst, GPR32:$src, GPR32:$src, imm0_31:$shift)>; def : InstAlias<"ror $dst, $src, $shift", (EXTRXrri GPR64:$dst, GPR64:$src, GPR64:$src, imm0_63:$shift)>; def : Pat<(rotr GPR32:$Rn, (i64 imm0_31:$imm)), (EXTRWrri GPR32:$Rn, GPR32:$Rn, imm0_31:$imm)>; def : Pat<(rotr GPR64:$Rn, (i64 imm0_63:$imm)), (EXTRXrri GPR64:$Rn, GPR64:$Rn, imm0_63:$imm)>; //===----------------------------------------------------------------------===// // Other bitfield immediate instructions. //===----------------------------------------------------------------------===// let hasSideEffects = 0 in { defm BFM : BitfieldImmWith2RegArgs<0b01, "bfm">; defm SBFM : BitfieldImm<0b00, "sbfm">; defm UBFM : BitfieldImm<0b10, "ubfm">; } def i32shift_a : Operand, SDNodeXFormgetZExtValue()) & 0x1f; return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); }]>; def i32shift_b : Operand, SDNodeXFormgetZExtValue(); return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); }]>; // min(7, 31 - shift_amt) def i32shift_sext_i8 : Operand, SDNodeXFormgetZExtValue(); enc = enc > 7 ? 7 : enc; return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); }]>; // min(15, 31 - shift_amt) def i32shift_sext_i16 : Operand, SDNodeXFormgetZExtValue(); enc = enc > 15 ? 15 : enc; return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); }]>; def i64shift_a : Operand, SDNodeXFormgetZExtValue()) & 0x3f; return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); }]>; def i64shift_b : Operand, SDNodeXFormgetZExtValue(); return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); }]>; // min(7, 63 - shift_amt) def i64shift_sext_i8 : Operand, SDNodeXFormgetZExtValue(); enc = enc > 7 ? 7 : enc; return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); }]>; // min(15, 63 - shift_amt) def i64shift_sext_i16 : Operand, SDNodeXFormgetZExtValue(); enc = enc > 15 ? 15 : enc; return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); }]>; // min(31, 63 - shift_amt) def i64shift_sext_i32 : Operand, SDNodeXFormgetZExtValue(); enc = enc > 31 ? 31 : enc; return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); }]>; def : Pat<(shl GPR32:$Rn, (i64 imm0_31:$imm)), (UBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), (i64 (i32shift_b imm0_31:$imm)))>; def : Pat<(shl GPR64:$Rn, (i64 imm0_63:$imm)), (UBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), (i64 (i64shift_b imm0_63:$imm)))>; let AddedComplexity = 10 in { def : Pat<(sra GPR32:$Rn, (i64 imm0_31:$imm)), (SBFMWri GPR32:$Rn, imm0_31:$imm, 31)>; def : Pat<(sra GPR64:$Rn, (i64 imm0_63:$imm)), (SBFMXri GPR64:$Rn, imm0_63:$imm, 63)>; } def : InstAlias<"asr $dst, $src, $shift", (SBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>; def : InstAlias<"asr $dst, $src, $shift", (SBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>; def : InstAlias<"sxtb $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 7)>; def : InstAlias<"sxtb $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 7)>; def : InstAlias<"sxth $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 15)>; def : InstAlias<"sxth $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; def : InstAlias<"sxtw $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; def : Pat<(srl GPR32:$Rn, (i64 imm0_31:$imm)), (UBFMWri GPR32:$Rn, imm0_31:$imm, 31)>; def : Pat<(srl GPR64:$Rn, (i64 imm0_63:$imm)), (UBFMXri GPR64:$Rn, imm0_63:$imm, 63)>; def : InstAlias<"lsr $dst, $src, $shift", (UBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>; def : InstAlias<"lsr $dst, $src, $shift", (UBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>; def : InstAlias<"uxtb $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 7)>; def : InstAlias<"uxtb $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 7)>; def : InstAlias<"uxth $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 15)>; def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; //===----------------------------------------------------------------------===// // Conditional comparison instructions. //===----------------------------------------------------------------------===// defm CCMN : CondComparison<0, "ccmn", AArch64ccmn>; defm CCMP : CondComparison<1, "ccmp", AArch64ccmp>; //===----------------------------------------------------------------------===// // Conditional select instructions. //===----------------------------------------------------------------------===// defm CSEL : CondSelect<0, 0b00, "csel">; def inc : PatFrag<(ops node:$in), (add_and_or_is_add node:$in, 1)>; defm CSINC : CondSelectOp<0, 0b01, "csinc", inc>; defm CSINV : CondSelectOp<1, 0b00, "csinv", not>; defm CSNEG : CondSelectOp<1, 0b01, "csneg", ineg>; def : Pat<(AArch64csinv GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), (CSINVWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; def : Pat<(AArch64csinv GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), (CSINVXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; def : Pat<(AArch64csneg GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), (CSNEGWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; def : Pat<(AArch64csneg GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), (CSNEGXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; def : Pat<(AArch64csinc GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), (CSINCWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; def : Pat<(AArch64csinc GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), (CSINCXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; def : Pat<(AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV), (CSINCWr WZR, WZR, (i32 imm:$cc))>; def : Pat<(AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV), (CSINCXr XZR, XZR, (i32 imm:$cc))>; def : Pat<(AArch64csel GPR32:$tval, (i32 1), (i32 imm:$cc), NZCV), (CSINCWr GPR32:$tval, WZR, (i32 imm:$cc))>; def : Pat<(AArch64csel GPR64:$tval, (i64 1), (i32 imm:$cc), NZCV), (CSINCXr GPR64:$tval, XZR, (i32 imm:$cc))>; def : Pat<(AArch64csel (i32 1), GPR32:$fval, (i32 imm:$cc), NZCV), (CSINCWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>; def : Pat<(AArch64csel (i64 1), GPR64:$fval, (i32 imm:$cc), NZCV), (CSINCXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>; def : Pat<(AArch64csel (i32 0), (i32 -1), (i32 imm:$cc), NZCV), (CSINVWr WZR, WZR, (i32 imm:$cc))>; def : Pat<(AArch64csel (i64 0), (i64 -1), (i32 imm:$cc), NZCV), (CSINVXr XZR, XZR, (i32 imm:$cc))>; def : Pat<(AArch64csel GPR32:$tval, (i32 -1), (i32 imm:$cc), NZCV), (CSINVWr GPR32:$tval, WZR, (i32 imm:$cc))>; def : Pat<(AArch64csel GPR64:$tval, (i64 -1), (i32 imm:$cc), NZCV), (CSINVXr GPR64:$tval, XZR, (i32 imm:$cc))>; def : Pat<(AArch64csel (i32 -1), GPR32:$fval, (i32 imm:$cc), NZCV), (CSINVWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>; def : Pat<(AArch64csel (i64 -1), GPR64:$fval, (i32 imm:$cc), NZCV), (CSINVXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>; def : Pat<(add_and_or_is_add GPR32:$val, (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)), (CSINCWr GPR32:$val, GPR32:$val, (i32 imm:$cc))>; def : Pat<(add_and_or_is_add GPR64:$val, (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))), (CSINCXr GPR64:$val, GPR64:$val, (i32 imm:$cc))>; def : Pat<(or (topbitsallzero32:$val), (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)), (CSINCWr GPR32:$val, WZR, imm:$cc)>; def : Pat<(or (topbitsallzero64:$val), (AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV)), (CSINCXr GPR64:$val, XZR, imm:$cc)>; def : Pat<(or (topbitsallzero64:$val), (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))), (CSINCXr GPR64:$val, XZR, imm:$cc)>; def : Pat<(and (topbitsallzero32:$val), (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)), (CSELWr WZR, GPR32:$val, imm:$cc)>; def : Pat<(and (topbitsallzero64:$val), (AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV)), (CSELXr XZR, GPR64:$val, imm:$cc)>; def : Pat<(and (topbitsallzero64:$val), (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))), (CSELXr XZR, GPR64:$val, imm:$cc)>; // The inverse of the condition code from the alias instruction is what is used // in the aliased instruction. The parser all ready inverts the condition code // for these aliases. def : InstAlias<"cset $dst, $cc", (CSINCWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>; def : InstAlias<"cset $dst, $cc", (CSINCXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>; def : InstAlias<"csetm $dst, $cc", (CSINVWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>; def : InstAlias<"csetm $dst, $cc", (CSINVXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>; def : InstAlias<"cinc $dst, $src, $cc", (CSINCWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; def : InstAlias<"cinc $dst, $src, $cc", (CSINCXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; def : InstAlias<"cinv $dst, $src, $cc", (CSINVWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; def : InstAlias<"cinv $dst, $src, $cc", (CSINVXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; def : InstAlias<"cneg $dst, $src, $cc", (CSNEGWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; def : InstAlias<"cneg $dst, $src, $cc", (CSNEGXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; //===----------------------------------------------------------------------===// // PC-relative instructions. //===----------------------------------------------------------------------===// let isReMaterializable = 1 in { let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in { def ADR : ADRI<0, "adr", adrlabel, [(set GPR64:$Xd, (AArch64adr tglobaladdr:$label))]>; } // hasSideEffects = 0 def ADRP : ADRI<1, "adrp", adrplabel, [(set GPR64:$Xd, (AArch64adrp tglobaladdr:$label))]>; } // isReMaterializable = 1 // page address of a constant pool entry, block address def : Pat<(AArch64adr tconstpool:$cp), (ADR tconstpool:$cp)>; def : Pat<(AArch64adr tblockaddress:$cp), (ADR tblockaddress:$cp)>; def : Pat<(AArch64adr texternalsym:$sym), (ADR texternalsym:$sym)>; def : Pat<(AArch64adr tjumptable:$sym), (ADR tjumptable:$sym)>; def : Pat<(AArch64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>; def : Pat<(AArch64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>; def : Pat<(AArch64adrp texternalsym:$sym), (ADRP texternalsym:$sym)>; //===----------------------------------------------------------------------===// // Unconditional branch (register) instructions. //===----------------------------------------------------------------------===// let isReturn = 1, isTerminator = 1, isBarrier = 1 in { def RET : BranchReg<0b0010, "ret", []>; def DRPS : SpecialReturn<0b0101, "drps">; def ERET : SpecialReturn<0b0100, "eret">; } // isReturn = 1, isTerminator = 1, isBarrier = 1 // Default to the LR register. def : InstAlias<"ret", (RET LR)>; let isCall = 1, Defs = [LR], Uses = [SP] in { def BLR : BranchReg<0b0001, "blr", []>; def BLRNoIP : Pseudo<(outs), (ins GPR64noip:$Rn), []>, Sched<[WriteBrReg]>, PseudoInstExpansion<(BLR GPR64:$Rn)>; def BLR_RVMARKER : Pseudo<(outs), (ins variable_ops), []>, Sched<[WriteBrReg]>; def BLR_BTI : Pseudo<(outs), (ins variable_ops), []>, Sched<[WriteBrReg]>; let Uses = [X16, SP] in def BLR_X16 : Pseudo<(outs), (ins), [(AArch64call_arm64ec_to_x64 X16)]>, Sched<[WriteBrReg]>, PseudoInstExpansion<(BLR X16)>; } // isCall def : Pat<(AArch64call GPR64:$Rn), (BLR GPR64:$Rn)>, Requires<[NoSLSBLRMitigation]>; def : Pat<(AArch64call GPR64noip:$Rn), (BLRNoIP GPR64noip:$Rn)>, Requires<[SLSBLRMitigation]>; def : Pat<(AArch64call_rvmarker (i64 tglobaladdr:$rvfunc), GPR64:$Rn), (BLR_RVMARKER tglobaladdr:$rvfunc, GPR64:$Rn)>, Requires<[NoSLSBLRMitigation]>; def : Pat<(AArch64call_bti GPR64:$Rn), (BLR_BTI GPR64:$Rn)>, Requires<[NoSLSBLRMitigation]>; def : Pat<(AArch64call_bti GPR64noip:$Rn), (BLR_BTI GPR64noip:$Rn)>, Requires<[SLSBLRMitigation]>; let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>; } // isBranch, isTerminator, isBarrier, isIndirectBranch // Create a separate pseudo-instruction for codegen to use so that we don't // flag lr as used in every function. It'll be restored before the RET by the // epilogue if it's legitimately used. def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retglue)]>, Sched<[WriteBrReg]> { let isTerminator = 1; let isBarrier = 1; let isReturn = 1; } // This is a directive-like pseudo-instruction. The purpose is to insert an // R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction // (which in the usual case is a BLR). let hasSideEffects = 1 in def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []>, Sched<[]> { let AsmString = ".tlsdesccall $sym"; } // Pseudo instruction to tell the streamer to emit a 'B' character into the // augmentation string. def EMITBKEY : Pseudo<(outs), (ins), []>, Sched<[]> {} // Pseudo instruction to tell the streamer to emit a 'G' character into the // augmentation string. def EMITMTETAGGED : Pseudo<(outs), (ins), []>, Sched<[]> {} // FIXME: maybe the scratch register used shouldn't be fixed to X1? // FIXME: can "hasSideEffects be dropped? // This gets lowered to an instruction sequence which takes 16 bytes let isCall = 1, Defs = [NZCV, LR, X0, X1], hasSideEffects = 1, Size = 16, isCodeGenOnly = 1 in def TLSDESC_CALLSEQ : Pseudo<(outs), (ins i64imm:$sym), [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>, Sched<[WriteI, WriteLD, WriteI, WriteBrReg]>; def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym), (TLSDESC_CALLSEQ texternalsym:$sym)>; //===----------------------------------------------------------------------===// // Conditional branch (immediate) instruction. //===----------------------------------------------------------------------===// def Bcc : BranchCond<0, "b">; // Armv8.8-A variant form which hints to the branch predictor that // this branch is very likely to go the same way nearly all the time // (even though it is not known at compile time _which_ way that is). def BCcc : BranchCond<1, "bc">, Requires<[HasHBC]>; //===----------------------------------------------------------------------===// // Compare-and-branch instructions. //===----------------------------------------------------------------------===// defm CBZ : CmpBranch<0, "cbz", AArch64cbz>; defm CBNZ : CmpBranch<1, "cbnz", AArch64cbnz>; //===----------------------------------------------------------------------===// // Test-bit-and-branch instructions. //===----------------------------------------------------------------------===// defm TBZ : TestBranch<0, "tbz", AArch64tbz>; defm TBNZ : TestBranch<1, "tbnz", AArch64tbnz>; //===----------------------------------------------------------------------===// // Unconditional branch (immediate) instructions. //===----------------------------------------------------------------------===// let isBranch = 1, isTerminator = 1, isBarrier = 1 in { def B : BranchImm<0, "b", [(br bb:$addr)]>; } // isBranch, isTerminator, isBarrier let isCall = 1, Defs = [LR], Uses = [SP] in { def BL : CallImm<1, "bl", [(AArch64call tglobaladdr:$addr)]>; } // isCall def : Pat<(AArch64call texternalsym:$func), (BL texternalsym:$func)>; //===----------------------------------------------------------------------===// // Exception generation instructions. //===----------------------------------------------------------------------===// let isTrap = 1 in { def BRK : ExceptionGeneration<0b001, 0b00, "brk", [(int_aarch64_break timm32_0_65535:$imm)]>; } def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">; def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">; def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">, Requires<[HasEL3]>; def HLT : ExceptionGeneration<0b010, 0b00, "hlt", [(int_aarch64_hlt timm32_0_65535:$imm)]>; def HVC : ExceptionGeneration<0b000, 0b10, "hvc">; def SMC : ExceptionGeneration<0b000, 0b11, "smc">, Requires<[HasEL3]>; def SVC : ExceptionGeneration<0b000, 0b01, "svc">; // DCPSn defaults to an immediate operand of zero if unspecified. def : InstAlias<"dcps1", (DCPS1 0)>; def : InstAlias<"dcps2", (DCPS2 0)>; def : InstAlias<"dcps3", (DCPS3 0)>, Requires<[HasEL3]>; def UDF : UDFType<0, "udf">; //===----------------------------------------------------------------------===// // Load instructions. //===----------------------------------------------------------------------===// // Pair (indexed, offset) defm LDPW : LoadPairOffset<0b00, 0, GPR32z, simm7s4, "ldp">; defm LDPX : LoadPairOffset<0b10, 0, GPR64z, simm7s8, "ldp">; let Predicates = [HasFPARMv8] in { defm LDPS : LoadPairOffset<0b00, 1, FPR32Op, simm7s4, "ldp">; defm LDPD : LoadPairOffset<0b01, 1, FPR64Op, simm7s8, "ldp">; defm LDPQ : LoadPairOffset<0b10, 1, FPR128Op, simm7s16, "ldp">; } defm LDPSW : LoadPairOffset<0b01, 0, GPR64z, simm7s4, "ldpsw">; // Pair (pre-indexed) def LDPWpre : LoadPairPreIdx<0b00, 0, GPR32z, simm7s4, "ldp">; def LDPXpre : LoadPairPreIdx<0b10, 0, GPR64z, simm7s8, "ldp">; let Predicates = [HasFPARMv8] in { def LDPSpre : LoadPairPreIdx<0b00, 1, FPR32Op, simm7s4, "ldp">; def LDPDpre : LoadPairPreIdx<0b01, 1, FPR64Op, simm7s8, "ldp">; def LDPQpre : LoadPairPreIdx<0b10, 1, FPR128Op, simm7s16, "ldp">; } def LDPSWpre : LoadPairPreIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">; // Pair (post-indexed) def LDPWpost : LoadPairPostIdx<0b00, 0, GPR32z, simm7s4, "ldp">; def LDPXpost : LoadPairPostIdx<0b10, 0, GPR64z, simm7s8, "ldp">; let Predicates = [HasFPARMv8] in { def LDPSpost : LoadPairPostIdx<0b00, 1, FPR32Op, simm7s4, "ldp">; def LDPDpost : LoadPairPostIdx<0b01, 1, FPR64Op, simm7s8, "ldp">; def LDPQpost : LoadPairPostIdx<0b10, 1, FPR128Op, simm7s16, "ldp">; } def LDPSWpost : LoadPairPostIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">; // Pair (no allocate) defm LDNPW : LoadPairNoAlloc<0b00, 0, GPR32z, simm7s4, "ldnp">; defm LDNPX : LoadPairNoAlloc<0b10, 0, GPR64z, simm7s8, "ldnp">; let Predicates = [HasFPARMv8] in { defm LDNPS : LoadPairNoAlloc<0b00, 1, FPR32Op, simm7s4, "ldnp">; defm LDNPD : LoadPairNoAlloc<0b01, 1, FPR64Op, simm7s8, "ldnp">; defm LDNPQ : LoadPairNoAlloc<0b10, 1, FPR128Op, simm7s16, "ldnp">; } def : Pat<(AArch64ldp (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)), (LDPXi GPR64sp:$Rn, simm7s8:$offset)>; def : Pat<(AArch64ldnp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$offset)), (LDNPQi GPR64sp:$Rn, simm7s16:$offset)>; //--- // (register offset) //--- // Integer defm LDRBB : Load8RO<0b00, 0, 0b01, GPR32, "ldrb", i32, zextloadi8>; defm LDRHH : Load16RO<0b01, 0, 0b01, GPR32, "ldrh", i32, zextloadi16>; defm LDRW : Load32RO<0b10, 0, 0b01, GPR32, "ldr", i32, load>; defm LDRX : Load64RO<0b11, 0, 0b01, GPR64, "ldr", i64, load>; // Floating-point let Predicates = [HasFPARMv8] in { defm LDRB : Load8RO<0b00, 1, 0b01, FPR8Op, "ldr", i8, load>; defm LDRH : Load16RO<0b01, 1, 0b01, FPR16Op, "ldr", f16, load>; defm LDRS : Load32RO<0b10, 1, 0b01, FPR32Op, "ldr", f32, load>; defm LDRD : Load64RO<0b11, 1, 0b01, FPR64Op, "ldr", f64, load>; defm LDRQ : Load128RO<0b00, 1, 0b11, FPR128Op, "ldr", f128, load>; } // Load sign-extended half-word defm LDRSHW : Load16RO<0b01, 0, 0b11, GPR32, "ldrsh", i32, sextloadi16>; defm LDRSHX : Load16RO<0b01, 0, 0b10, GPR64, "ldrsh", i64, sextloadi16>; // Load sign-extended byte defm LDRSBW : Load8RO<0b00, 0, 0b11, GPR32, "ldrsb", i32, sextloadi8>; defm LDRSBX : Load8RO<0b00, 0, 0b10, GPR64, "ldrsb", i64, sextloadi8>; // Load sign-extended word defm LDRSW : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw", i64, sextloadi32>; // Pre-fetch. defm PRFM : PrefetchRO<0b11, 0, 0b10, "prfm">; // For regular load, we do not have any alignment requirement. // Thus, it is safe to directly map the vector loads with interesting // addressing modes. // FIXME: We could do the same for bitconvert to floating point vectors. multiclass ScalToVecROLoadPat { def : Pat<(VecTy (scalar_to_vector (ScalTy (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset))))), (INSERT_SUBREG (VecTy (IMPLICIT_DEF)), (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset), sub)>; def : Pat<(VecTy (scalar_to_vector (ScalTy (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset))))), (INSERT_SUBREG (VecTy (IMPLICIT_DEF)), (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset), sub)>; } let AddedComplexity = 10 in { defm : ScalToVecROLoadPat; defm : ScalToVecROLoadPat; defm : ScalToVecROLoadPat; defm : ScalToVecROLoadPat; defm : ScalToVecROLoadPat; defm : ScalToVecROLoadPat; defm : ScalToVecROLoadPat; defm : ScalToVecROLoadPat; defm : ScalToVecROLoadPat; defm : ScalToVecROLoadPat; defm : ScalToVecROLoadPat; defm : ScalToVecROLoadPat; def : Pat <(v1i64 (scalar_to_vector (i64 (load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend))))), (LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>; def : Pat <(v1i64 (scalar_to_vector (i64 (load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend))))), (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>; } // Match all load 64 bits width whose type is compatible with FPR64 multiclass VecROLoadPat { def : Pat<(VecTy (load (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; def : Pat<(VecTy (load (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; } let AddedComplexity = 10 in { let Predicates = [IsLE] in { // We must do vector loads with LD1 in big-endian. defm : VecROLoadPat; defm : VecROLoadPat; defm : VecROLoadPat; defm : VecROLoadPat; defm : VecROLoadPat; defm : VecROLoadPat; } defm : VecROLoadPat; defm : VecROLoadPat; // Match all load 128 bits width whose type is compatible with FPR128 let Predicates = [IsLE] in { // We must do vector loads with LD1 in big-endian. defm : VecROLoadPat; defm : VecROLoadPat; defm : VecROLoadPat; defm : VecROLoadPat; defm : VecROLoadPat; defm : VecROLoadPat; defm : VecROLoadPat; defm : VecROLoadPat; } } // AddedComplexity = 10 // zextload -> i64 multiclass ExtLoadTo64ROPat { def : Pat<(i64 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), (SUBREG_TO_REG (i64 0), (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend), sub_32)>; def : Pat<(i64 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), (SUBREG_TO_REG (i64 0), (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend), sub_32)>; } let AddedComplexity = 10 in { defm : ExtLoadTo64ROPat; defm : ExtLoadTo64ROPat; defm : ExtLoadTo64ROPat; // zextloadi1 -> zextloadi8 defm : ExtLoadTo64ROPat; // extload -> zextload defm : ExtLoadTo64ROPat; defm : ExtLoadTo64ROPat; defm : ExtLoadTo64ROPat; // extloadi1 -> zextloadi8 defm : ExtLoadTo64ROPat; } // zextload -> i64 multiclass ExtLoadTo32ROPat { def : Pat<(i32 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; def : Pat<(i32 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; } let AddedComplexity = 10 in { // extload -> zextload defm : ExtLoadTo32ROPat; defm : ExtLoadTo32ROPat; defm : ExtLoadTo32ROPat; // zextloadi1 -> zextloadi8 defm : ExtLoadTo32ROPat; } //--- // (unsigned immediate) //--- defm LDRX : LoadUI<0b11, 0, 0b01, GPR64z, uimm12s8, "ldr", [(set GPR64z:$Rt, (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>; defm LDRW : LoadUI<0b10, 0, 0b01, GPR32z, uimm12s4, "ldr", [(set GPR32z:$Rt, (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>; let Predicates = [HasFPARMv8] in { defm LDRB : LoadUI<0b00, 1, 0b01, FPR8Op, uimm12s1, "ldr", [(set FPR8Op:$Rt, (load (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)))]>; defm LDRH : LoadUI<0b01, 1, 0b01, FPR16Op, uimm12s2, "ldr", [(set (f16 FPR16Op:$Rt), (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)))]>; defm LDRS : LoadUI<0b10, 1, 0b01, FPR32Op, uimm12s4, "ldr", [(set (f32 FPR32Op:$Rt), (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>; defm LDRD : LoadUI<0b11, 1, 0b01, FPR64Op, uimm12s8, "ldr", [(set (f64 FPR64Op:$Rt), (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>; defm LDRQ : LoadUI<0b00, 1, 0b11, FPR128Op, uimm12s16, "ldr", [(set (f128 FPR128Op:$Rt), (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)))]>; } // bf16 load pattern def : Pat <(bf16 (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; // For regular load, we do not have any alignment requirement. // Thus, it is safe to directly map the vector loads with interesting // addressing modes. // FIXME: We could do the same for bitconvert to floating point vectors. def : Pat <(v8i8 (scalar_to_vector (i32 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; def : Pat <(v16i8 (scalar_to_vector (i32 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; def : Pat <(v4i16 (scalar_to_vector (i32 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; def : Pat <(v8i16 (scalar_to_vector (i32 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; def : Pat <(v2i32 (scalar_to_vector (i32 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; def : Pat <(v4i32 (scalar_to_vector (i32 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; def : Pat <(v1i64 (scalar_to_vector (i64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))), (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; def : Pat <(v2i64 (scalar_to_vector (i64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))), (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (LDRDui GPR64sp:$Rn, uimm12s8:$offset), dsub)>; // Match all load 64 bits width whose type is compatible with FPR64 let Predicates = [IsLE] in { // We must use LD1 to perform vector loads in big-endian. def : Pat<(v2f32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; def : Pat<(v8i8 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; def : Pat<(v4i16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; def : Pat<(v2i32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; def : Pat<(v4f16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; def : Pat<(v4bf16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; } def : Pat<(v1f64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; def : Pat<(v1i64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; // Match all load 128 bits width whose type is compatible with FPR128 let Predicates = [IsLE] in { // We must use LD1 to perform vector loads in big-endian. def : Pat<(v4f32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; def : Pat<(v2f64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; def : Pat<(v16i8 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; def : Pat<(v8i16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; def : Pat<(v4i32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; def : Pat<(v2i64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; def : Pat<(v8f16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; def : Pat<(v8bf16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; } def : Pat<(f128 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; defm LDRHH : LoadUI<0b01, 0, 0b01, GPR32, uimm12s2, "ldrh", [(set GPR32:$Rt, (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)))]>; defm LDRBB : LoadUI<0b00, 0, 0b01, GPR32, uimm12s1, "ldrb", [(set GPR32:$Rt, (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)))]>; // zextload -> i64 def : Pat<(i64 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; def : Pat<(i64 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>; // zextloadi1 -> zextloadi8 def : Pat<(i32 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; def : Pat<(i64 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; // extload -> zextload def : Pat<(i32 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>; def : Pat<(i32 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; def : Pat<(i32 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; def : Pat<(i64 (extloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))), (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>; def : Pat<(i64 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>; def : Pat<(i64 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; def : Pat<(i64 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; // load sign-extended half-word defm LDRSHW : LoadUI<0b01, 0, 0b11, GPR32, uimm12s2, "ldrsh", [(set GPR32:$Rt, (sextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)))]>; defm LDRSHX : LoadUI<0b01, 0, 0b10, GPR64, uimm12s2, "ldrsh", [(set GPR64:$Rt, (sextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)))]>; // load sign-extended byte defm LDRSBW : LoadUI<0b00, 0, 0b11, GPR32, uimm12s1, "ldrsb", [(set GPR32:$Rt, (sextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)))]>; defm LDRSBX : LoadUI<0b00, 0, 0b10, GPR64, uimm12s1, "ldrsb", [(set GPR64:$Rt, (sextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)))]>; // load sign-extended word defm LDRSW : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw", [(set GPR64:$Rt, (sextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>; // load zero-extended word def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))), (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>; // Pre-fetch. def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm", [(AArch64Prefetch timm:$Rt, (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>; def : InstAlias<"prfm $Rt, [$Rn]", (PRFMui prfop:$Rt, GPR64sp:$Rn, 0)>; //--- // (literal) def alignedglobal : PatLeaf<(iPTR iPTR:$label), [{ if (auto *G = dyn_cast(N)) { const DataLayout &DL = MF->getDataLayout(); Align Align = G->getGlobal()->getPointerAlignment(DL); return Align >= 4 && G->getOffset() % 4 == 0; } if (auto *C = dyn_cast(N)) return C->getAlign() >= 4 && C->getOffset() % 4 == 0; return false; }]>; def LDRWl : LoadLiteral<0b00, 0, GPR32z, "ldr", [(set GPR32z:$Rt, (load (AArch64adr alignedglobal:$label)))]>; def LDRXl : LoadLiteral<0b01, 0, GPR64z, "ldr", [(set GPR64z:$Rt, (load (AArch64adr alignedglobal:$label)))]>; let Predicates = [HasFPARMv8] in { def LDRSl : LoadLiteral<0b00, 1, FPR32Op, "ldr", [(set (f32 FPR32Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>; def LDRDl : LoadLiteral<0b01, 1, FPR64Op, "ldr", [(set (f64 FPR64Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>; def LDRQl : LoadLiteral<0b10, 1, FPR128Op, "ldr", [(set (f128 FPR128Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>; } // load sign-extended word def LDRSWl : LoadLiteral<0b10, 0, GPR64z, "ldrsw", [(set GPR64z:$Rt, (sextloadi32 (AArch64adr alignedglobal:$label)))]>; let AddedComplexity = 20 in { def : Pat<(i64 (zextloadi32 (AArch64adr alignedglobal:$label))), (SUBREG_TO_REG (i64 0), (LDRWl $label), sub_32)>; } // prefetch def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>; // [(AArch64Prefetch imm:$Rt, tglobaladdr:$label)]>; //--- // (unscaled immediate) defm LDURX : LoadUnscaled<0b11, 0, 0b01, GPR64z, "ldur", [(set GPR64z:$Rt, (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>; defm LDURW : LoadUnscaled<0b10, 0, 0b01, GPR32z, "ldur", [(set GPR32z:$Rt, (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; let Predicates = [HasFPARMv8] in { defm LDURB : LoadUnscaled<0b00, 1, 0b01, FPR8Op, "ldur", [(set FPR8Op:$Rt, (load (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; defm LDURH : LoadUnscaled<0b01, 1, 0b01, FPR16Op, "ldur", [(set (f16 FPR16Op:$Rt), (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; defm LDURS : LoadUnscaled<0b10, 1, 0b01, FPR32Op, "ldur", [(set (f32 FPR32Op:$Rt), (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; defm LDURD : LoadUnscaled<0b11, 1, 0b01, FPR64Op, "ldur", [(set (f64 FPR64Op:$Rt), (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>; defm LDURQ : LoadUnscaled<0b00, 1, 0b11, FPR128Op, "ldur", [(set (f128 FPR128Op:$Rt), (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset)))]>; } defm LDURHH : LoadUnscaled<0b01, 0, 0b01, GPR32, "ldurh", [(set GPR32:$Rt, (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; defm LDURBB : LoadUnscaled<0b00, 0, 0b01, GPR32, "ldurb", [(set GPR32:$Rt, (zextloadi8 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; // bf16 load pattern def : Pat <(bf16 (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), (LDURHi GPR64sp:$Rn, simm9:$offset)>; // Match all load 64 bits width whose type is compatible with FPR64 let Predicates = [IsLE] in { def : Pat<(v2f32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), (LDURDi GPR64sp:$Rn, simm9:$offset)>; def : Pat<(v2i32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), (LDURDi GPR64sp:$Rn, simm9:$offset)>; def : Pat<(v4i16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), (LDURDi GPR64sp:$Rn, simm9:$offset)>; def : Pat<(v8i8 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), (LDURDi GPR64sp:$Rn, simm9:$offset)>; def : Pat<(v4f16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), (LDURDi GPR64sp:$Rn, simm9:$offset)>; } def : Pat<(v1f64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), (LDURDi GPR64sp:$Rn, simm9:$offset)>; def : Pat<(v1i64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), (LDURDi GPR64sp:$Rn, simm9:$offset)>; // Match all load 128 bits width whose type is compatible with FPR128 let Predicates = [IsLE] in { def : Pat<(v2f64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), (LDURQi GPR64sp:$Rn, simm9:$offset)>; def : Pat<(v2i64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), (LDURQi GPR64sp:$Rn, simm9:$offset)>; def : Pat<(v4f32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), (LDURQi GPR64sp:$Rn, simm9:$offset)>; def : Pat<(v4i32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), (LDURQi GPR64sp:$Rn, simm9:$offset)>; def : Pat<(v8i16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), (LDURQi GPR64sp:$Rn, simm9:$offset)>; def : Pat<(v16i8 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), (LDURQi GPR64sp:$Rn, simm9:$offset)>; def : Pat<(v8f16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), (LDURQi GPR64sp:$Rn, simm9:$offset)>; } // anyext -> zext def : Pat<(i32 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), (LDURHHi GPR64sp:$Rn, simm9:$offset)>; def : Pat<(i32 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), (LDURBBi GPR64sp:$Rn, simm9:$offset)>; def : Pat<(i32 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), (LDURBBi GPR64sp:$Rn, simm9:$offset)>; def : Pat<(i64 (extloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))), (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>; def : Pat<(i64 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; def : Pat<(i64 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; def : Pat<(i64 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; // unscaled zext def : Pat<(i32 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), (LDURHHi GPR64sp:$Rn, simm9:$offset)>; def : Pat<(i32 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), (LDURBBi GPR64sp:$Rn, simm9:$offset)>; def : Pat<(i32 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), (LDURBBi GPR64sp:$Rn, simm9:$offset)>; def : Pat<(i64 (zextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))), (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>; def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; def : Pat<(i64 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; //--- // LDR mnemonics fall back to LDUR for negative or unaligned offsets. // Define new assembler match classes as we want to only match these when // the don't otherwise match the scaled addressing mode for LDR/STR. Don't // associate a DiagnosticType either, as we want the diagnostic for the // canonical form (the scaled operand) to take precedence. class SImm9OffsetOperand : AsmOperandClass { let Name = "SImm9OffsetFB" # Width; let PredicateMethod = "isSImm9OffsetFB<" # Width # ">"; let RenderMethod = "addImmOperands"; } def SImm9OffsetFB8Operand : SImm9OffsetOperand<8>; def SImm9OffsetFB16Operand : SImm9OffsetOperand<16>; def SImm9OffsetFB32Operand : SImm9OffsetOperand<32>; def SImm9OffsetFB64Operand : SImm9OffsetOperand<64>; def SImm9OffsetFB128Operand : SImm9OffsetOperand<128>; def simm9_offset_fb8 : Operand { let ParserMatchClass = SImm9OffsetFB8Operand; } def simm9_offset_fb16 : Operand { let ParserMatchClass = SImm9OffsetFB16Operand; } def simm9_offset_fb32 : Operand { let ParserMatchClass = SImm9OffsetFB32Operand; } def simm9_offset_fb64 : Operand { let ParserMatchClass = SImm9OffsetFB64Operand; } def simm9_offset_fb128 : Operand { let ParserMatchClass = SImm9OffsetFB128Operand; } def : InstAlias<"ldr $Rt, [$Rn, $offset]", (LDURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; def : InstAlias<"ldr $Rt, [$Rn, $offset]", (LDURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; let Predicates = [HasFPARMv8] in { def : InstAlias<"ldr $Rt, [$Rn, $offset]", (LDURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; def : InstAlias<"ldr $Rt, [$Rn, $offset]", (LDURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; def : InstAlias<"ldr $Rt, [$Rn, $offset]", (LDURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; def : InstAlias<"ldr $Rt, [$Rn, $offset]", (LDURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; def : InstAlias<"ldr $Rt, [$Rn, $offset]", (LDURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>; } // zextload -> i64 def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; // load sign-extended half-word defm LDURSHW : LoadUnscaled<0b01, 0, 0b11, GPR32, "ldursh", [(set GPR32:$Rt, (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; defm LDURSHX : LoadUnscaled<0b01, 0, 0b10, GPR64, "ldursh", [(set GPR64:$Rt, (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; // load sign-extended byte defm LDURSBW : LoadUnscaled<0b00, 0, 0b11, GPR32, "ldursb", [(set GPR32:$Rt, (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; defm LDURSBX : LoadUnscaled<0b00, 0, 0b10, GPR64, "ldursb", [(set GPR64:$Rt, (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; // load sign-extended word defm LDURSW : LoadUnscaled<0b10, 0, 0b10, GPR64, "ldursw", [(set GPR64:$Rt, (sextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; // zero and sign extending aliases from generic LDR* mnemonics to LDUR*. def : InstAlias<"ldrb $Rt, [$Rn, $offset]", (LDURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; def : InstAlias<"ldrh $Rt, [$Rn, $offset]", (LDURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; def : InstAlias<"ldrsb $Rt, [$Rn, $offset]", (LDURSBWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; def : InstAlias<"ldrsb $Rt, [$Rn, $offset]", (LDURSBXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; def : InstAlias<"ldrsh $Rt, [$Rn, $offset]", (LDURSHWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; def : InstAlias<"ldrsh $Rt, [$Rn, $offset]", (LDURSHXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; def : InstAlias<"ldrsw $Rt, [$Rn, $offset]", (LDURSWi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; // A LDR will implicitly zero the rest of the vector, so vector_insert(zeros, // load, 0) can use a single load. multiclass LoadInsertZeroPatterns { // Scaled def : Pat <(vector_insert (VT immAllZerosV), (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)), (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>; // Unscaled def : Pat <(vector_insert (VT immAllZerosV), (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)), (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>; // Half-vector patterns def : Pat <(vector_insert (HVT immAllZerosV), (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)), (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>; // Unscaled def : Pat <(vector_insert (HVT immAllZerosV), (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)), (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>; // SVE patterns def : Pat <(vector_insert (SVT immAllZerosV), (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)), (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>; // Unscaled def : Pat <(vector_insert (SVT immAllZerosV), (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)), (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>; } defm : LoadInsertZeroPatterns; defm : LoadInsertZeroPatterns; defm : LoadInsertZeroPatterns; defm : LoadInsertZeroPatterns; defm : LoadInsertZeroPatterns; defm : LoadInsertZeroPatterns; defm : LoadInsertZeroPatterns; defm : LoadInsertZeroPatterns; // Pre-fetch. defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum", [(AArch64Prefetch timm:$Rt, (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; //--- // (unscaled immediate, unprivileged) defm LDTRX : LoadUnprivileged<0b11, 0, 0b01, GPR64, "ldtr">; defm LDTRW : LoadUnprivileged<0b10, 0, 0b01, GPR32, "ldtr">; defm LDTRH : LoadUnprivileged<0b01, 0, 0b01, GPR32, "ldtrh">; defm LDTRB : LoadUnprivileged<0b00, 0, 0b01, GPR32, "ldtrb">; // load sign-extended half-word defm LDTRSHW : LoadUnprivileged<0b01, 0, 0b11, GPR32, "ldtrsh">; defm LDTRSHX : LoadUnprivileged<0b01, 0, 0b10, GPR64, "ldtrsh">; // load sign-extended byte defm LDTRSBW : LoadUnprivileged<0b00, 0, 0b11, GPR32, "ldtrsb">; defm LDTRSBX : LoadUnprivileged<0b00, 0, 0b10, GPR64, "ldtrsb">; // load sign-extended word defm LDTRSW : LoadUnprivileged<0b10, 0, 0b10, GPR64, "ldtrsw">; //--- // (immediate pre-indexed) def LDRWpre : LoadPreIdx<0b10, 0, 0b01, GPR32z, "ldr">; def LDRXpre : LoadPreIdx<0b11, 0, 0b01, GPR64z, "ldr">; let Predicates = [HasFPARMv8] in { def LDRBpre : LoadPreIdx<0b00, 1, 0b01, FPR8Op, "ldr">; def LDRHpre : LoadPreIdx<0b01, 1, 0b01, FPR16Op, "ldr">; def LDRSpre : LoadPreIdx<0b10, 1, 0b01, FPR32Op, "ldr">; def LDRDpre : LoadPreIdx<0b11, 1, 0b01, FPR64Op, "ldr">; def LDRQpre : LoadPreIdx<0b00, 1, 0b11, FPR128Op, "ldr">; } // load sign-extended half-word def LDRSHWpre : LoadPreIdx<0b01, 0, 0b11, GPR32z, "ldrsh">; def LDRSHXpre : LoadPreIdx<0b01, 0, 0b10, GPR64z, "ldrsh">; // load sign-extended byte def LDRSBWpre : LoadPreIdx<0b00, 0, 0b11, GPR32z, "ldrsb">; def LDRSBXpre : LoadPreIdx<0b00, 0, 0b10, GPR64z, "ldrsb">; // load zero-extended byte def LDRBBpre : LoadPreIdx<0b00, 0, 0b01, GPR32z, "ldrb">; def LDRHHpre : LoadPreIdx<0b01, 0, 0b01, GPR32z, "ldrh">; // load sign-extended word def LDRSWpre : LoadPreIdx<0b10, 0, 0b10, GPR64z, "ldrsw">; //--- // (immediate post-indexed) def LDRWpost : LoadPostIdx<0b10, 0, 0b01, GPR32z, "ldr">; def LDRXpost : LoadPostIdx<0b11, 0, 0b01, GPR64z, "ldr">; let Predicates = [HasFPARMv8] in { def LDRBpost : LoadPostIdx<0b00, 1, 0b01, FPR8Op, "ldr">; def LDRHpost : LoadPostIdx<0b01, 1, 0b01, FPR16Op, "ldr">; def LDRSpost : LoadPostIdx<0b10, 1, 0b01, FPR32Op, "ldr">; def LDRDpost : LoadPostIdx<0b11, 1, 0b01, FPR64Op, "ldr">; def LDRQpost : LoadPostIdx<0b00, 1, 0b11, FPR128Op, "ldr">; } // load sign-extended half-word def LDRSHWpost : LoadPostIdx<0b01, 0, 0b11, GPR32z, "ldrsh">; def LDRSHXpost : LoadPostIdx<0b01, 0, 0b10, GPR64z, "ldrsh">; // load sign-extended byte def LDRSBWpost : LoadPostIdx<0b00, 0, 0b11, GPR32z, "ldrsb">; def LDRSBXpost : LoadPostIdx<0b00, 0, 0b10, GPR64z, "ldrsb">; // load zero-extended byte def LDRBBpost : LoadPostIdx<0b00, 0, 0b01, GPR32z, "ldrb">; def LDRHHpost : LoadPostIdx<0b01, 0, 0b01, GPR32z, "ldrh">; // load sign-extended word def LDRSWpost : LoadPostIdx<0b10, 0, 0b10, GPR64z, "ldrsw">; //===----------------------------------------------------------------------===// // Store instructions. //===----------------------------------------------------------------------===// // Pair (indexed, offset) // FIXME: Use dedicated range-checked addressing mode operand here. defm STPW : StorePairOffset<0b00, 0, GPR32z, simm7s4, "stp">; defm STPX : StorePairOffset<0b10, 0, GPR64z, simm7s8, "stp">; let Predicates = [HasFPARMv8] in { defm STPS : StorePairOffset<0b00, 1, FPR32Op, simm7s4, "stp">; defm STPD : StorePairOffset<0b01, 1, FPR64Op, simm7s8, "stp">; defm STPQ : StorePairOffset<0b10, 1, FPR128Op, simm7s16, "stp">; } // Pair (pre-indexed) def STPWpre : StorePairPreIdx<0b00, 0, GPR32z, simm7s4, "stp">; def STPXpre : StorePairPreIdx<0b10, 0, GPR64z, simm7s8, "stp">; let Predicates = [HasFPARMv8] in { def STPSpre : StorePairPreIdx<0b00, 1, FPR32Op, simm7s4, "stp">; def STPDpre : StorePairPreIdx<0b01, 1, FPR64Op, simm7s8, "stp">; def STPQpre : StorePairPreIdx<0b10, 1, FPR128Op, simm7s16, "stp">; } // Pair (post-indexed) def STPWpost : StorePairPostIdx<0b00, 0, GPR32z, simm7s4, "stp">; def STPXpost : StorePairPostIdx<0b10, 0, GPR64z, simm7s8, "stp">; let Predicates = [HasFPARMv8] in { def STPSpost : StorePairPostIdx<0b00, 1, FPR32Op, simm7s4, "stp">; def STPDpost : StorePairPostIdx<0b01, 1, FPR64Op, simm7s8, "stp">; def STPQpost : StorePairPostIdx<0b10, 1, FPR128Op, simm7s16, "stp">; } // Pair (no allocate) defm STNPW : StorePairNoAlloc<0b00, 0, GPR32z, simm7s4, "stnp">; defm STNPX : StorePairNoAlloc<0b10, 0, GPR64z, simm7s8, "stnp">; let Predicates = [HasFPARMv8] in { defm STNPS : StorePairNoAlloc<0b00, 1, FPR32Op, simm7s4, "stnp">; defm STNPD : StorePairNoAlloc<0b01, 1, FPR64Op, simm7s8, "stnp">; defm STNPQ : StorePairNoAlloc<0b10, 1, FPR128Op, simm7s16, "stnp">; } def : Pat<(AArch64stp GPR64z:$Rt, GPR64z:$Rt2, (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)), (STPXi GPR64z:$Rt, GPR64z:$Rt2, GPR64sp:$Rn, simm7s8:$offset)>; def : Pat<(AArch64stnp FPR128:$Rt, FPR128:$Rt2, (am_indexed7s128 GPR64sp:$Rn, simm7s16:$offset)), (STNPQi FPR128:$Rt, FPR128:$Rt2, GPR64sp:$Rn, simm7s16:$offset)>; //--- // (Register offset) // Integer defm STRBB : Store8RO< 0b00, 0, 0b00, GPR32, "strb", i32, truncstorei8>; defm STRHH : Store16RO<0b01, 0, 0b00, GPR32, "strh", i32, truncstorei16>; defm STRW : Store32RO<0b10, 0, 0b00, GPR32, "str", i32, store>; defm STRX : Store64RO<0b11, 0, 0b00, GPR64, "str", i64, store>; // Floating-point let Predicates = [HasFPARMv8] in { defm STRB : Store8RO< 0b00, 1, 0b00, FPR8Op, "str", i8, store>; defm STRH : Store16RO<0b01, 1, 0b00, FPR16Op, "str", f16, store>; defm STRS : Store32RO<0b10, 1, 0b00, FPR32Op, "str", f32, store>; defm STRD : Store64RO<0b11, 1, 0b00, FPR64Op, "str", f64, store>; defm STRQ : Store128RO<0b00, 1, 0b10, FPR128Op, "str">; } let Predicates = [UseSTRQro], AddedComplexity = 10 in { def : Pat<(store (f128 FPR128:$Rt), (ro_Windexed128 GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend)), (STRQroW FPR128:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend)>; def : Pat<(store (f128 FPR128:$Rt), (ro_Xindexed128 GPR64sp:$Rn, GPR64:$Rm, ro_Xextend128:$extend)), (STRQroX FPR128:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Wextend128:$extend)>; } multiclass TruncStoreFrom64ROPat { def : Pat<(storeop GPR64:$Rt, (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), (STRW (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; def : Pat<(storeop GPR64:$Rt, (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), (STRX (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; } let AddedComplexity = 10 in { // truncstore i64 defm : TruncStoreFrom64ROPat; defm : TruncStoreFrom64ROPat; defm : TruncStoreFrom64ROPat; } multiclass VecROStorePat { def : Pat<(store (VecTy FPR:$Rt), (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), (STRW FPR:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; def : Pat<(store (VecTy FPR:$Rt), (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), (STRX FPR:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; } let AddedComplexity = 10 in { // Match all store 64 bits width whose type is compatible with FPR64 let Predicates = [IsLE] in { // We must use ST1 to store vectors in big-endian. defm : VecROStorePat; defm : VecROStorePat; defm : VecROStorePat; defm : VecROStorePat; defm : VecROStorePat; defm : VecROStorePat; } defm : VecROStorePat; defm : VecROStorePat; // Match all store 128 bits width whose type is compatible with FPR128 let Predicates = [IsLE, UseSTRQro] in { // We must use ST1 to store vectors in big-endian. defm : VecROStorePat; defm : VecROStorePat; defm : VecROStorePat; defm : VecROStorePat; defm : VecROStorePat; defm : VecROStorePat; defm : VecROStorePat; defm : VecROStorePat; } } // AddedComplexity = 10 // Match stores from lane 0 to the appropriate subreg's store. multiclass VecROStoreLane0Pat { def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), (i64 0))), (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), (STRW (SubRegTy (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx)), GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), (i64 0))), (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), (STRX (SubRegTy (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx)), GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; } let AddedComplexity = 19 in { defm : VecROStoreLane0Pat; defm : VecROStoreLane0Pat; defm : VecROStoreLane0Pat; defm : VecROStoreLane0Pat; defm : VecROStoreLane0Pat; defm : VecROStoreLane0Pat; } //--- // (unsigned immediate) defm STRX : StoreUIz<0b11, 0, 0b00, GPR64z, uimm12s8, "str", [(store GPR64z:$Rt, (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>; defm STRW : StoreUIz<0b10, 0, 0b00, GPR32z, uimm12s4, "str", [(store GPR32z:$Rt, (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>; let Predicates = [HasFPARMv8] in { defm STRB : StoreUI<0b00, 1, 0b00, FPR8Op, uimm12s1, "str", [(store FPR8Op:$Rt, (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))]>; defm STRH : StoreUI<0b01, 1, 0b00, FPR16Op, uimm12s2, "str", [(store (f16 FPR16Op:$Rt), (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))]>; defm STRS : StoreUI<0b10, 1, 0b00, FPR32Op, uimm12s4, "str", [(store (f32 FPR32Op:$Rt), (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>; defm STRD : StoreUI<0b11, 1, 0b00, FPR64Op, uimm12s8, "str", [(store (f64 FPR64Op:$Rt), (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>; defm STRQ : StoreUI<0b00, 1, 0b10, FPR128Op, uimm12s16, "str", []>; } defm STRHH : StoreUIz<0b01, 0, 0b00, GPR32z, uimm12s2, "strh", [(truncstorei16 GPR32z:$Rt, (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))]>; defm STRBB : StoreUIz<0b00, 0, 0b00, GPR32z, uimm12s1, "strb", [(truncstorei8 GPR32z:$Rt, (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))]>; // bf16 store pattern def : Pat<(store (bf16 FPR16Op:$Rt), (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)), (STRHui FPR16:$Rt, GPR64sp:$Rn, uimm12s2:$offset)>; let AddedComplexity = 10 in { // Match all store 64 bits width whose type is compatible with FPR64 def : Pat<(store (v1i64 FPR64:$Rt), (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; def : Pat<(store (v1f64 FPR64:$Rt), (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; let Predicates = [IsLE] in { // We must use ST1 to store vectors in big-endian. def : Pat<(store (v2f32 FPR64:$Rt), (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; def : Pat<(store (v8i8 FPR64:$Rt), (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; def : Pat<(store (v4i16 FPR64:$Rt), (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; def : Pat<(store (v2i32 FPR64:$Rt), (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; def : Pat<(store (v4f16 FPR64:$Rt), (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; def : Pat<(store (v4bf16 FPR64:$Rt), (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; } // Match all store 128 bits width whose type is compatible with FPR128 def : Pat<(store (f128 FPR128:$Rt), (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; let Predicates = [IsLE] in { // We must use ST1 to store vectors in big-endian. def : Pat<(store (v4f32 FPR128:$Rt), (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; def : Pat<(store (v2f64 FPR128:$Rt), (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; def : Pat<(store (v16i8 FPR128:$Rt), (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; def : Pat<(store (v8i16 FPR128:$Rt), (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; def : Pat<(store (v4i32 FPR128:$Rt), (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; def : Pat<(store (v2i64 FPR128:$Rt), (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; def : Pat<(store (v8f16 FPR128:$Rt), (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; def : Pat<(store (v8bf16 FPR128:$Rt), (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; } // truncstore i64 def : Pat<(truncstorei32 GPR64:$Rt, (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)), (STRWui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s4:$offset)>; def : Pat<(truncstorei16 GPR64:$Rt, (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)), (STRHHui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s2:$offset)>; def : Pat<(truncstorei8 GPR64:$Rt, (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)), (STRBBui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s1:$offset)>; } // AddedComplexity = 10 // Match stores from lane 0 to the appropriate subreg's store. multiclass VecStoreLane0Pat { def : Pat<(storeop (STy (vector_extract (VTy VecListOne128:$Vt), (i64 0))), (UIAddrMode GPR64sp:$Rn, IndexType:$offset)), (STR (SubRegTy (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx)), GPR64sp:$Rn, IndexType:$offset)>; } let AddedComplexity = 19 in { defm : VecStoreLane0Pat; defm : VecStoreLane0Pat; defm : VecStoreLane0Pat; defm : VecStoreLane0Pat; defm : VecStoreLane0Pat; defm : VecStoreLane0Pat; } //--- // (unscaled immediate) defm STURX : StoreUnscaled<0b11, 0, 0b00, GPR64z, "stur", [(store GPR64z:$Rt, (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; defm STURW : StoreUnscaled<0b10, 0, 0b00, GPR32z, "stur", [(store GPR32z:$Rt, (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>; let Predicates = [HasFPARMv8] in { defm STURB : StoreUnscaled<0b00, 1, 0b00, FPR8Op, "stur", [(store FPR8Op:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>; defm STURH : StoreUnscaled<0b01, 1, 0b00, FPR16Op, "stur", [(store (f16 FPR16Op:$Rt), (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>; defm STURS : StoreUnscaled<0b10, 1, 0b00, FPR32Op, "stur", [(store (f32 FPR32Op:$Rt), (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>; defm STURD : StoreUnscaled<0b11, 1, 0b00, FPR64Op, "stur", [(store (f64 FPR64Op:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; defm STURQ : StoreUnscaled<0b00, 1, 0b10, FPR128Op, "stur", [(store (f128 FPR128Op:$Rt), (am_unscaled128 GPR64sp:$Rn, simm9:$offset))]>; } defm STURHH : StoreUnscaled<0b01, 0, 0b00, GPR32z, "sturh", [(truncstorei16 GPR32z:$Rt, (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>; defm STURBB : StoreUnscaled<0b00, 0, 0b00, GPR32z, "sturb", [(truncstorei8 GPR32z:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>; // bf16 store pattern def : Pat<(store (bf16 FPR16Op:$Rt), (am_unscaled16 GPR64sp:$Rn, simm9:$offset)), (STURHi FPR16:$Rt, GPR64sp:$Rn, simm9:$offset)>; // Armv8.4 Weaker Release Consistency enhancements // LDAPR & STLR with Immediate Offset instructions let Predicates = [HasRCPC_IMMO] in { defm STLURB : BaseStoreUnscaleV84<"stlurb", 0b00, 0b00, GPR32>; defm STLURH : BaseStoreUnscaleV84<"stlurh", 0b01, 0b00, GPR32>; defm STLURW : BaseStoreUnscaleV84<"stlur", 0b10, 0b00, GPR32>; defm STLURX : BaseStoreUnscaleV84<"stlur", 0b11, 0b00, GPR64>; defm LDAPURB : BaseLoadUnscaleV84<"ldapurb", 0b00, 0b01, GPR32>; defm LDAPURSBW : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b11, GPR32>; defm LDAPURSBX : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b10, GPR64>; defm LDAPURH : BaseLoadUnscaleV84<"ldapurh", 0b01, 0b01, GPR32>; defm LDAPURSHW : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b11, GPR32>; defm LDAPURSHX : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b10, GPR64>; defm LDAPUR : BaseLoadUnscaleV84<"ldapur", 0b10, 0b01, GPR32>; defm LDAPURSW : BaseLoadUnscaleV84<"ldapursw", 0b10, 0b10, GPR64>; defm LDAPURX : BaseLoadUnscaleV84<"ldapur", 0b11, 0b01, GPR64>; } // Match all store 64 bits width whose type is compatible with FPR64 def : Pat<(store (v1f64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; def : Pat<(store (v1i64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; let AddedComplexity = 10 in { let Predicates = [IsLE] in { // We must use ST1 to store vectors in big-endian. def : Pat<(store (v2f32 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; def : Pat<(store (v8i8 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; def : Pat<(store (v4i16 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; def : Pat<(store (v2i32 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; def : Pat<(store (v4f16 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; def : Pat<(store (v4bf16 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; } // Match all store 128 bits width whose type is compatible with FPR128 def : Pat<(store (f128 FPR128:$Rt), (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; let Predicates = [IsLE] in { // We must use ST1 to store vectors in big-endian. def : Pat<(store (v4f32 FPR128:$Rt), (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; def : Pat<(store (v2f64 FPR128:$Rt), (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; def : Pat<(store (v16i8 FPR128:$Rt), (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; def : Pat<(store (v8i16 FPR128:$Rt), (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; def : Pat<(store (v4i32 FPR128:$Rt), (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; def : Pat<(store (v2i64 FPR128:$Rt), (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; def : Pat<(store (v2f64 FPR128:$Rt), (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; def : Pat<(store (v8f16 FPR128:$Rt), (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; def : Pat<(store (v8bf16 FPR128:$Rt), (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; } } // AddedComplexity = 10 // unscaled i64 truncating stores def : Pat<(truncstorei32 GPR64:$Rt, (am_unscaled32 GPR64sp:$Rn, simm9:$offset)), (STURWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; def : Pat<(truncstorei16 GPR64:$Rt, (am_unscaled16 GPR64sp:$Rn, simm9:$offset)), (STURHHi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; def : Pat<(truncstorei8 GPR64:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)), (STURBBi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; // Match stores from lane 0 to the appropriate subreg's store. multiclass VecStoreULane0Pat { defm : VecStoreLane0Pat; } let AddedComplexity = 19 in { defm : VecStoreULane0Pat; defm : VecStoreULane0Pat; defm : VecStoreULane0Pat; defm : VecStoreULane0Pat; defm : VecStoreULane0Pat; defm : VecStoreULane0Pat; } //--- // STR mnemonics fall back to STUR for negative or unaligned offsets. def : InstAlias<"str $Rt, [$Rn, $offset]", (STURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; def : InstAlias<"str $Rt, [$Rn, $offset]", (STURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; let Predicates = [HasFPARMv8] in { def : InstAlias<"str $Rt, [$Rn, $offset]", (STURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; def : InstAlias<"str $Rt, [$Rn, $offset]", (STURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; def : InstAlias<"str $Rt, [$Rn, $offset]", (STURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; def : InstAlias<"str $Rt, [$Rn, $offset]", (STURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; def : InstAlias<"str $Rt, [$Rn, $offset]", (STURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>; } def : InstAlias<"strb $Rt, [$Rn, $offset]", (STURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; def : InstAlias<"strh $Rt, [$Rn, $offset]", (STURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; //--- // (unscaled immediate, unprivileged) defm STTRW : StoreUnprivileged<0b10, 0, 0b00, GPR32, "sttr">; defm STTRX : StoreUnprivileged<0b11, 0, 0b00, GPR64, "sttr">; defm STTRH : StoreUnprivileged<0b01, 0, 0b00, GPR32, "sttrh">; defm STTRB : StoreUnprivileged<0b00, 0, 0b00, GPR32, "sttrb">; //--- // (immediate pre-indexed) def STRWpre : StorePreIdx<0b10, 0, 0b00, GPR32z, "str", pre_store, i32>; def STRXpre : StorePreIdx<0b11, 0, 0b00, GPR64z, "str", pre_store, i64>; let Predicates = [HasFPARMv8] in { def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8Op, "str", pre_store, i8>; def STRHpre : StorePreIdx<0b01, 1, 0b00, FPR16Op, "str", pre_store, f16>; def STRSpre : StorePreIdx<0b10, 1, 0b00, FPR32Op, "str", pre_store, f32>; def STRDpre : StorePreIdx<0b11, 1, 0b00, FPR64Op, "str", pre_store, f64>; def STRQpre : StorePreIdx<0b00, 1, 0b10, FPR128Op, "str", pre_store, f128>; } def STRBBpre : StorePreIdx<0b00, 0, 0b00, GPR32z, "strb", pre_truncsti8, i32>; def STRHHpre : StorePreIdx<0b01, 0, 0b00, GPR32z, "strh", pre_truncsti16, i32>; // truncstore i64 def : Pat<(pre_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off), (STRWpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, simm9:$off)>; def : Pat<(pre_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off), (STRHHpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, simm9:$off)>; def : Pat<(pre_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off), (STRBBpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, simm9:$off)>; def : Pat<(pre_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off), (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(pre_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(pre_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(pre_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(pre_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(pre_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(pre_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(pre_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off), (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(pre_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(pre_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(pre_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(pre_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(pre_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(pre_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; //--- // (immediate post-indexed) def STRWpost : StorePostIdx<0b10, 0, 0b00, GPR32z, "str", post_store, i32>; def STRXpost : StorePostIdx<0b11, 0, 0b00, GPR64z, "str", post_store, i64>; let Predicates = [HasFPARMv8] in { def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8Op, "str", post_store, i8>; def STRHpost : StorePostIdx<0b01, 1, 0b00, FPR16Op, "str", post_store, f16>; def STRSpost : StorePostIdx<0b10, 1, 0b00, FPR32Op, "str", post_store, f32>; def STRDpost : StorePostIdx<0b11, 1, 0b00, FPR64Op, "str", post_store, f64>; def STRQpost : StorePostIdx<0b00, 1, 0b10, FPR128Op, "str", post_store, f128>; } def STRBBpost : StorePostIdx<0b00, 0, 0b00, GPR32z, "strb", post_truncsti8, i32>; def STRHHpost : StorePostIdx<0b01, 0, 0b00, GPR32z, "strh", post_truncsti16, i32>; // truncstore i64 def : Pat<(post_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off), (STRWpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, simm9:$off)>; def : Pat<(post_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off), (STRHHpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, simm9:$off)>; def : Pat<(post_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off), (STRBBpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, simm9:$off)>; def : Pat<(post_store (bf16 FPR16:$Rt), GPR64sp:$addr, simm9:$off), (STRHpost FPR16:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(post_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off), (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(post_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(post_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(post_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(post_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(post_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(post_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(post_store (v4bf16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(post_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off), (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(post_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(post_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(post_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(post_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(post_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(post_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; def : Pat<(post_store (v8bf16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; //===----------------------------------------------------------------------===// // Load/store exclusive instructions. //===----------------------------------------------------------------------===// def LDARW : LoadAcquire <0b10, 1, 1, 0, 1, GPR32, "ldar">; def LDARX : LoadAcquire <0b11, 1, 1, 0, 1, GPR64, "ldar">; def LDARB : LoadAcquire <0b00, 1, 1, 0, 1, GPR32, "ldarb">; def LDARH : LoadAcquire <0b01, 1, 1, 0, 1, GPR32, "ldarh">; def LDAXRW : LoadExclusive <0b10, 0, 1, 0, 1, GPR32, "ldaxr">; def LDAXRX : LoadExclusive <0b11, 0, 1, 0, 1, GPR64, "ldaxr">; def LDAXRB : LoadExclusive <0b00, 0, 1, 0, 1, GPR32, "ldaxrb">; def LDAXRH : LoadExclusive <0b01, 0, 1, 0, 1, GPR32, "ldaxrh">; def LDXRW : LoadExclusive <0b10, 0, 1, 0, 0, GPR32, "ldxr">; def LDXRX : LoadExclusive <0b11, 0, 1, 0, 0, GPR64, "ldxr">; def LDXRB : LoadExclusive <0b00, 0, 1, 0, 0, GPR32, "ldxrb">; def LDXRH : LoadExclusive <0b01, 0, 1, 0, 0, GPR32, "ldxrh">; def STLRW : StoreRelease <0b10, 1, 0, 0, 1, GPR32, "stlr">; def STLRX : StoreRelease <0b11, 1, 0, 0, 1, GPR64, "stlr">; def STLRB : StoreRelease <0b00, 1, 0, 0, 1, GPR32, "stlrb">; def STLRH : StoreRelease <0b01, 1, 0, 0, 1, GPR32, "stlrh">; /* Aliases for when offset=0. Note that in contrast to LoadAcquire which has a $Rn of type GPR64sp0, we deliberately choose to make $Rn of type GPR64sp and add an alias for the case of immediate #0. This is because new STLR versions (from LRCPC3 extension) do have a non-zero immediate value, so GPR64sp0 is not appropriate anymore (it parses and discards the optional zero). This is not the case for LoadAcquire because the new LRCPC3 LDAR instructions are post-indexed, and the immediate values are not inside the [] brackets and thus not accepted by GPR64sp0 parser. */ def STLRW0 : InstAlias<"stlr\t$Rt, [$Rn, #0]" , (STLRW GPR32: $Rt, GPR64sp:$Rn)>; def STLRX0 : InstAlias<"stlr\t$Rt, [$Rn, #0]" , (STLRX GPR64: $Rt, GPR64sp:$Rn)>; def STLRB0 : InstAlias<"stlrb\t$Rt, [$Rn, #0]", (STLRB GPR32: $Rt, GPR64sp:$Rn)>; def STLRH0 : InstAlias<"stlrh\t$Rt, [$Rn, #0]", (STLRH GPR32: $Rt, GPR64sp:$Rn)>; def STLXRW : StoreExclusive<0b10, 0, 0, 0, 1, GPR32, "stlxr">; def STLXRX : StoreExclusive<0b11, 0, 0, 0, 1, GPR64, "stlxr">; def STLXRB : StoreExclusive<0b00, 0, 0, 0, 1, GPR32, "stlxrb">; def STLXRH : StoreExclusive<0b01, 0, 0, 0, 1, GPR32, "stlxrh">; def STXRW : StoreExclusive<0b10, 0, 0, 0, 0, GPR32, "stxr">; def STXRX : StoreExclusive<0b11, 0, 0, 0, 0, GPR64, "stxr">; def STXRB : StoreExclusive<0b00, 0, 0, 0, 0, GPR32, "stxrb">; def STXRH : StoreExclusive<0b01, 0, 0, 0, 0, GPR32, "stxrh">; def LDAXPW : LoadExclusivePair<0b10, 0, 1, 1, 1, GPR32, "ldaxp">; def LDAXPX : LoadExclusivePair<0b11, 0, 1, 1, 1, GPR64, "ldaxp">; def LDXPW : LoadExclusivePair<0b10, 0, 1, 1, 0, GPR32, "ldxp">; def LDXPX : LoadExclusivePair<0b11, 0, 1, 1, 0, GPR64, "ldxp">; def STLXPW : StoreExclusivePair<0b10, 0, 0, 1, 1, GPR32, "stlxp">; def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">; def STXPW : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">; def STXPX : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">; let Predicates = [HasLOR] in { // v8.1a "Limited Order Region" extension load-acquire instructions def LDLARW : LoadAcquire <0b10, 1, 1, 0, 0, GPR32, "ldlar">; def LDLARX : LoadAcquire <0b11, 1, 1, 0, 0, GPR64, "ldlar">; def LDLARB : LoadAcquire <0b00, 1, 1, 0, 0, GPR32, "ldlarb">; def LDLARH : LoadAcquire <0b01, 1, 1, 0, 0, GPR32, "ldlarh">; // v8.1a "Limited Order Region" extension store-release instructions def STLLRW : StoreRelease <0b10, 1, 0, 0, 0, GPR32, "stllr">; def STLLRX : StoreRelease <0b11, 1, 0, 0, 0, GPR64, "stllr">; def STLLRB : StoreRelease <0b00, 1, 0, 0, 0, GPR32, "stllrb">; def STLLRH : StoreRelease <0b01, 1, 0, 0, 0, GPR32, "stllrh">; // Aliases for when offset=0 def STLLRW0 : InstAlias<"stllr\t$Rt, [$Rn, #0]", (STLLRW GPR32: $Rt, GPR64sp:$Rn)>; def STLLRX0 : InstAlias<"stllr\t$Rt, [$Rn, #0]", (STLLRX GPR64: $Rt, GPR64sp:$Rn)>; def STLLRB0 : InstAlias<"stllrb\t$Rt, [$Rn, #0]", (STLLRB GPR32: $Rt, GPR64sp:$Rn)>; def STLLRH0 : InstAlias<"stllrh\t$Rt, [$Rn, #0]", (STLLRH GPR32: $Rt, GPR64sp:$Rn)>; } //===----------------------------------------------------------------------===// // Scaled floating point to integer conversion instructions. //===----------------------------------------------------------------------===// defm FCVTAS : FPToIntegerUnscaled<0b00, 0b100, "fcvtas", int_aarch64_neon_fcvtas>; defm FCVTAU : FPToIntegerUnscaled<0b00, 0b101, "fcvtau", int_aarch64_neon_fcvtau>; defm FCVTMS : FPToIntegerUnscaled<0b10, 0b000, "fcvtms", int_aarch64_neon_fcvtms>; defm FCVTMU : FPToIntegerUnscaled<0b10, 0b001, "fcvtmu", int_aarch64_neon_fcvtmu>; defm FCVTNS : FPToIntegerUnscaled<0b00, 0b000, "fcvtns", int_aarch64_neon_fcvtns>; defm FCVTNU : FPToIntegerUnscaled<0b00, 0b001, "fcvtnu", int_aarch64_neon_fcvtnu>; defm FCVTPS : FPToIntegerUnscaled<0b01, 0b000, "fcvtps", int_aarch64_neon_fcvtps>; defm FCVTPU : FPToIntegerUnscaled<0b01, 0b001, "fcvtpu", int_aarch64_neon_fcvtpu>; defm FCVTZS : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>; defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>; defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>; defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>; // AArch64's FCVT instructions saturate when out of range. multiclass FPToIntegerSatPats { let Predicates = [HasFullFP16] in { def : Pat<(i32 (to_int_sat f16:$Rn, i32)), (!cast(INST # UWHr) f16:$Rn)>; def : Pat<(i64 (to_int_sat f16:$Rn, i64)), (!cast(INST # UXHr) f16:$Rn)>; } def : Pat<(i32 (to_int_sat f32:$Rn, i32)), (!cast(INST # UWSr) f32:$Rn)>; def : Pat<(i64 (to_int_sat f32:$Rn, i64)), (!cast(INST # UXSr) f32:$Rn)>; def : Pat<(i32 (to_int_sat f64:$Rn, i32)), (!cast(INST # UWDr) f64:$Rn)>; def : Pat<(i64 (to_int_sat f64:$Rn, i64)), (!cast(INST # UXDr) f64:$Rn)>; let Predicates = [HasFullFP16] in { def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)), (!cast(INST # SWHri) $Rn, $scale)>; def : Pat<(i64 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i64:$scale), i64)), (!cast(INST # SXHri) $Rn, $scale)>; } def : Pat<(i32 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i32:$scale), i32)), (!cast(INST # SWSri) $Rn, $scale)>; def : Pat<(i64 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i64:$scale), i64)), (!cast(INST # SXSri) $Rn, $scale)>; def : Pat<(i32 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i32:$scale), i32)), (!cast(INST # SWDri) $Rn, $scale)>; def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)), (!cast(INST # SXDri) $Rn, $scale)>; } defm : FPToIntegerSatPats; defm : FPToIntegerSatPats; multiclass FPToIntegerIntPats { let Predicates = [HasFullFP16] in { def : Pat<(i32 (round f16:$Rn)), (!cast(INST # UWHr) $Rn)>; def : Pat<(i64 (round f16:$Rn)), (!cast(INST # UXHr) $Rn)>; } def : Pat<(i32 (round f32:$Rn)), (!cast(INST # UWSr) $Rn)>; def : Pat<(i64 (round f32:$Rn)), (!cast(INST # UXSr) $Rn)>; def : Pat<(i32 (round f64:$Rn)), (!cast(INST # UWDr) $Rn)>; def : Pat<(i64 (round f64:$Rn)), (!cast(INST # UXDr) $Rn)>; let Predicates = [HasFullFP16] in { def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))), (!cast(INST # SWHri) $Rn, $scale)>; def : Pat<(i64 (round (fmul f16:$Rn, fixedpoint_f16_i64:$scale))), (!cast(INST # SXHri) $Rn, $scale)>; } def : Pat<(i32 (round (fmul f32:$Rn, fixedpoint_f32_i32:$scale))), (!cast(INST # SWSri) $Rn, $scale)>; def : Pat<(i64 (round (fmul f32:$Rn, fixedpoint_f32_i64:$scale))), (!cast(INST # SXSri) $Rn, $scale)>; def : Pat<(i32 (round (fmul f64:$Rn, fixedpoint_f64_i32:$scale))), (!cast(INST # SWDri) $Rn, $scale)>; def : Pat<(i64 (round (fmul f64:$Rn, fixedpoint_f64_i64:$scale))), (!cast(INST # SXDri) $Rn, $scale)>; } defm : FPToIntegerIntPats; defm : FPToIntegerIntPats; multiclass FPToIntegerPats { def : Pat<(i32 (to_int (round f32:$Rn))), (!cast(INST # UWSr) f32:$Rn)>; def : Pat<(i64 (to_int (round f32:$Rn))), (!cast(INST # UXSr) f32:$Rn)>; def : Pat<(i32 (to_int (round f64:$Rn))), (!cast(INST # UWDr) f64:$Rn)>; def : Pat<(i64 (to_int (round f64:$Rn))), (!cast(INST # UXDr) f64:$Rn)>; // These instructions saturate like fp_to_[su]int_sat. let Predicates = [HasFullFP16] in { def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)), (!cast(INST # UWHr) f16:$Rn)>; def : Pat<(i64 (to_int_sat (round f16:$Rn), i64)), (!cast(INST # UXHr) f16:$Rn)>; } def : Pat<(i32 (to_int_sat (round f32:$Rn), i32)), (!cast(INST # UWSr) f32:$Rn)>; def : Pat<(i64 (to_int_sat (round f32:$Rn), i64)), (!cast(INST # UXSr) f32:$Rn)>; def : Pat<(i32 (to_int_sat (round f64:$Rn), i32)), (!cast(INST # UWDr) f64:$Rn)>; def : Pat<(i64 (to_int_sat (round f64:$Rn), i64)), (!cast(INST # UXDr) f64:$Rn)>; } defm : FPToIntegerPats; defm : FPToIntegerPats; defm : FPToIntegerPats; defm : FPToIntegerPats; defm : FPToIntegerPats; defm : FPToIntegerPats; defm : FPToIntegerPats; defm : FPToIntegerPats; let Predicates = [HasFullFP16] in { def : Pat<(i32 (any_lround f16:$Rn)), (FCVTASUWHr f16:$Rn)>; def : Pat<(i64 (any_lround f16:$Rn)), (FCVTASUXHr f16:$Rn)>; def : Pat<(i64 (any_llround f16:$Rn)), (FCVTASUXHr f16:$Rn)>; } def : Pat<(i32 (any_lround f32:$Rn)), (FCVTASUWSr f32:$Rn)>; def : Pat<(i32 (any_lround f64:$Rn)), (FCVTASUWDr f64:$Rn)>; def : Pat<(i64 (any_lround f32:$Rn)), (FCVTASUXSr f32:$Rn)>; def : Pat<(i64 (any_lround f64:$Rn)), (FCVTASUXDr f64:$Rn)>; def : Pat<(i64 (any_llround f32:$Rn)), (FCVTASUXSr f32:$Rn)>; def : Pat<(i64 (any_llround f64:$Rn)), (FCVTASUXDr f64:$Rn)>; //===----------------------------------------------------------------------===// // Scaled integer to floating point conversion instructions. //===----------------------------------------------------------------------===// defm SCVTF : IntegerToFP<0, "scvtf", any_sint_to_fp>; defm UCVTF : IntegerToFP<1, "ucvtf", any_uint_to_fp>; def : Pat<(f16 (fdiv (f16 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f16_i32:$scale)), (SCVTFSWHri GPR32:$Rn, fixedpoint_f16_i32:$scale)>; def : Pat<(f32 (fdiv (f32 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f32_i32:$scale)), (SCVTFSWSri GPR32:$Rn, fixedpoint_f32_i32:$scale)>; def : Pat<(f64 (fdiv (f64 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f64_i32:$scale)), (SCVTFSWDri GPR32:$Rn, fixedpoint_f64_i32:$scale)>; def : Pat<(f16 (fdiv (f16 (any_sint_to_fp (i64 GPR64:$Rn))), fixedpoint_f16_i64:$scale)), (SCVTFSXHri GPR64:$Rn, fixedpoint_f16_i64:$scale)>; def : Pat<(f32 (fdiv (f32 (any_sint_to_fp (i64 GPR64:$Rn))), fixedpoint_f32_i64:$scale)), (SCVTFSXSri GPR64:$Rn, fixedpoint_f32_i64:$scale)>; def : Pat<(f64 (fdiv (f64 (any_sint_to_fp (i64 GPR64:$Rn))), fixedpoint_f64_i64:$scale)), (SCVTFSXDri GPR64:$Rn, fixedpoint_f64_i64:$scale)>; def : Pat<(f16 (fdiv (f16 (any_uint_to_fp (i64 GPR64:$Rn))), fixedpoint_f16_i64:$scale)), (UCVTFSXHri GPR64:$Rn, fixedpoint_f16_i64:$scale)>; def : Pat<(f32 (fdiv (f32 (any_uint_to_fp (i64 GPR64:$Rn))), fixedpoint_f32_i64:$scale)), (UCVTFSXSri GPR64:$Rn, fixedpoint_f32_i64:$scale)>; def : Pat<(f64 (fdiv (f64 (any_uint_to_fp (i64 GPR64:$Rn))), fixedpoint_f64_i64:$scale)), (UCVTFSXDri GPR64:$Rn, fixedpoint_f64_i64:$scale)>; def : Pat<(f16 (fdiv (f16 (any_uint_to_fp (i32 GPR32:$Rn))), fixedpoint_f16_i32:$scale)), (UCVTFSWHri GPR32:$Rn, fixedpoint_f16_i32:$scale)>; def : Pat<(f32 (fdiv (f32 (any_uint_to_fp (i32 GPR32:$Rn))), fixedpoint_f32_i32:$scale)), (UCVTFSWSri GPR32:$Rn, fixedpoint_f32_i32:$scale)>; def : Pat<(f64 (fdiv (f64 (any_uint_to_fp (i32 GPR32:$Rn))), fixedpoint_f64_i32:$scale)), (UCVTFSWDri GPR32:$Rn, fixedpoint_f64_i32:$scale)>; //===----------------------------------------------------------------------===// // Unscaled integer to floating point conversion instruction. //===----------------------------------------------------------------------===// defm FMOV : UnscaledConversion<"fmov">; // Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable let isReMaterializable = 1, isCodeGenOnly = 1, isAsCheapAsAMove = 1, Predicates = [HasFPARMv8] in { def FMOVH0 : Pseudo<(outs FPR16:$Rd), (ins), [(set f16:$Rd, (fpimm0))]>, Sched<[WriteF]>; def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>, Sched<[WriteF]>; def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>, Sched<[WriteF]>; } // Similarly add aliases def : InstAlias<"fmov $Rd, #0.0", (FMOVWHr FPR16:$Rd, WZR), 0>, Requires<[HasFullFP16]>; let Predicates = [HasFPARMv8] in { def : InstAlias<"fmov $Rd, #0.0", (FMOVWSr FPR32:$Rd, WZR), 0>; def : InstAlias<"fmov $Rd, #0.0", (FMOVXDr FPR64:$Rd, XZR), 0>; } def : Pat<(bf16 fpimm0), (FMOVH0)>; // Pattern for FP16 and BF16 immediates let Predicates = [HasFullFP16] in { def : Pat<(f16 fpimm:$in), (FMOVWHr (MOVi32imm (bitcast_fpimm_to_i32 f16:$in)))>; def : Pat<(bf16 fpimm:$in), (FMOVWHr (MOVi32imm (bitcast_fpimm_to_i32 bf16:$in)))>; } //===----------------------------------------------------------------------===// // Floating point conversion instruction. //===----------------------------------------------------------------------===// defm FCVT : FPConversion<"fcvt">; // Helper to get bf16 into fp32. def cvt_bf16_to_fp32 : OutPatFrag<(ops node:$Rn), (f32 (COPY_TO_REGCLASS (i32 (UBFMWri (i32 (COPY_TO_REGCLASS (INSERT_SUBREG (f32 (IMPLICIT_DEF)), node:$Rn, hsub), GPR32)), (i64 (i32shift_a (i64 16))), (i64 (i32shift_b (i64 16))))), FPR32))>; // Pattern for bf16 -> fp32. def : Pat<(f32 (any_fpextend (bf16 FPR16:$Rn))), (cvt_bf16_to_fp32 FPR16:$Rn)>; // Pattern for bf16 -> fp64. def : Pat<(f64 (any_fpextend (bf16 FPR16:$Rn))), (FCVTDSr (f32 (cvt_bf16_to_fp32 FPR16:$Rn)))>; //===----------------------------------------------------------------------===// // Floating point single operand instructions. //===----------------------------------------------------------------------===// defm FABS : SingleOperandFPDataNoException<0b0001, "fabs", fabs>; defm FMOV : SingleOperandFPDataNoException<0b0000, "fmov">; defm FNEG : SingleOperandFPDataNoException<0b0010, "fneg", fneg>; defm FRINTA : SingleOperandFPData<0b1100, "frinta", any_fround>; defm FRINTI : SingleOperandFPData<0b1111, "frinti", any_fnearbyint>; defm FRINTM : SingleOperandFPData<0b1010, "frintm", any_ffloor>; defm FRINTN : SingleOperandFPData<0b1000, "frintn", any_froundeven>; defm FRINTP : SingleOperandFPData<0b1001, "frintp", any_fceil>; defm FRINTX : SingleOperandFPData<0b1110, "frintx", any_frint>; defm FRINTZ : SingleOperandFPData<0b1011, "frintz", any_ftrunc>; let SchedRW = [WriteFDiv] in { defm FSQRT : SingleOperandFPData<0b0011, "fsqrt", any_fsqrt>; } let Predicates = [HasFRInt3264] in { defm FRINT32Z : FRIntNNT<0b00, "frint32z", int_aarch64_frint32z>; defm FRINT64Z : FRIntNNT<0b10, "frint64z", int_aarch64_frint64z>; defm FRINT32X : FRIntNNT<0b01, "frint32x", int_aarch64_frint32x>; defm FRINT64X : FRIntNNT<0b11, "frint64x", int_aarch64_frint64x>; } // HasFRInt3264 // Pattern to convert 1x64 vector intrinsics to equivalent scalar instructions def : Pat<(v1f64 (int_aarch64_neon_frint32z (v1f64 FPR64:$Rn))), (FRINT32ZDr FPR64:$Rn)>; def : Pat<(v1f64 (int_aarch64_neon_frint64z (v1f64 FPR64:$Rn))), (FRINT64ZDr FPR64:$Rn)>; def : Pat<(v1f64 (int_aarch64_neon_frint32x (v1f64 FPR64:$Rn))), (FRINT32XDr FPR64:$Rn)>; def : Pat<(v1f64 (int_aarch64_neon_frint64x (v1f64 FPR64:$Rn))), (FRINT64XDr FPR64:$Rn)>; // Emitting strict_lrint as two instructions is valid as any exceptions that // occur will happen in exactly one of the instructions (e.g. if the input is // not an integer the inexact exception will happen in the FRINTX but not then // in the FCVTZS as the output of FRINTX is an integer). let Predicates = [HasFullFP16] in { def : Pat<(i32 (any_lrint f16:$Rn)), (FCVTZSUWHr (FRINTXHr f16:$Rn))>; def : Pat<(i64 (any_lrint f16:$Rn)), (FCVTZSUXHr (FRINTXHr f16:$Rn))>; def : Pat<(i64 (any_llrint f16:$Rn)), (FCVTZSUXHr (FRINTXHr f16:$Rn))>; } def : Pat<(i32 (any_lrint f32:$Rn)), (FCVTZSUWSr (FRINTXSr f32:$Rn))>; def : Pat<(i32 (any_lrint f64:$Rn)), (FCVTZSUWDr (FRINTXDr f64:$Rn))>; def : Pat<(i64 (any_lrint f32:$Rn)), (FCVTZSUXSr (FRINTXSr f32:$Rn))>; def : Pat<(i64 (any_lrint f64:$Rn)), (FCVTZSUXDr (FRINTXDr f64:$Rn))>; def : Pat<(i64 (any_llrint f32:$Rn)), (FCVTZSUXSr (FRINTXSr f32:$Rn))>; def : Pat<(i64 (any_llrint f64:$Rn)), (FCVTZSUXDr (FRINTXDr f64:$Rn))>; //===----------------------------------------------------------------------===// // Floating point two operand instructions. //===----------------------------------------------------------------------===// defm FADD : TwoOperandFPData<0b0010, "fadd", any_fadd>; let SchedRW = [WriteFDiv] in { defm FDIV : TwoOperandFPData<0b0001, "fdiv", any_fdiv>; } defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", any_fmaxnum>; defm FMAX : TwoOperandFPData<0b0100, "fmax", any_fmaximum>; defm FMINNM : TwoOperandFPData<0b0111, "fminnm", any_fminnum>; defm FMIN : TwoOperandFPData<0b0101, "fmin", any_fminimum>; let SchedRW = [WriteFMul] in { defm FMUL : TwoOperandFPData<0b0000, "fmul", any_fmul>; defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", any_fmul>; } defm FSUB : TwoOperandFPData<0b0011, "fsub", any_fsub>; multiclass FMULScalarFromIndexedLane0Patterns preds = []> { let Predicates = !listconcat(preds, [HasFullFP16]) in { def : Pat<(f16 (OpNode (f16 FPR16:$Rn), (f16 (vector_extract (v8f16 V128:$Rm), (i64 0))))), (!cast(inst # inst_f16_suffix) FPR16:$Rn, (f16 (EXTRACT_SUBREG V128:$Rm, hsub)))>; } let Predicates = preds in { def : Pat<(f32 (OpNode (f32 FPR32:$Rn), (f32 (vector_extract (v4f32 V128:$Rm), (i64 0))))), (!cast(inst # inst_f32_suffix) FPR32:$Rn, (EXTRACT_SUBREG V128:$Rm, ssub))>; def : Pat<(f64 (OpNode (f64 FPR64:$Rn), (f64 (vector_extract (v2f64 V128:$Rm), (i64 0))))), (!cast(inst # inst_f64_suffix) FPR64:$Rn, (EXTRACT_SUBREG V128:$Rm, dsub))>; } } defm : FMULScalarFromIndexedLane0Patterns<"FMUL", "Hrr", "Srr", "Drr", any_fmul>; // Match reassociated forms of FNMUL. def : Pat<(fmul (fneg FPR16:$a), (f16 FPR16:$b)), (FNMULHrr FPR16:$a, FPR16:$b)>, Requires<[HasFullFP16]>; def : Pat<(fmul (fneg FPR32:$a), (f32 FPR32:$b)), (FNMULSrr FPR32:$a, FPR32:$b)>; def : Pat<(fmul (fneg FPR64:$a), (f64 FPR64:$b)), (FNMULDrr FPR64:$a, FPR64:$b)>; def : Pat<(v1f64 (fmaximum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), (FMAXDrr FPR64:$Rn, FPR64:$Rm)>; def : Pat<(v1f64 (fminimum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), (FMINDrr FPR64:$Rn, FPR64:$Rm)>; def : Pat<(v1f64 (fmaxnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), (FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>; def : Pat<(v1f64 (fminnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), (FMINNMDrr FPR64:$Rn, FPR64:$Rm)>; //===----------------------------------------------------------------------===// // Floating point three operand instructions. //===----------------------------------------------------------------------===// defm FMADD : ThreeOperandFPData<0, 0, "fmadd", any_fma>; defm FMSUB : ThreeOperandFPData<0, 1, "fmsub", TriOpFrag<(any_fma node:$LHS, (fneg node:$MHS), node:$RHS)> >; defm FNMADD : ThreeOperandFPData<1, 0, "fnmadd", TriOpFrag<(fneg (any_fma node:$LHS, node:$MHS, node:$RHS))> >; defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub", TriOpFrag<(any_fma node:$LHS, node:$MHS, (fneg node:$RHS))> >; // The following def pats catch the case where the LHS of an FMA is negated. // The TriOpFrag above catches the case where the middle operand is negated. // N.b. FMSUB etc have the accumulator at the *end* of (outs), unlike // the NEON variant. // Here we handle first -(a + b*c) for FNMADD: let Predicates = [HasNEON, HasFullFP16] in def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, FPR16:$Ra)), (FMSUBHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>; def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, FPR32:$Ra)), (FMSUBSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, FPR64:$Ra)), (FMSUBDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; // Now it's time for "(-a) + (-b)*c" let Predicates = [HasNEON, HasFullFP16] in def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, (fneg FPR16:$Ra))), (FNMADDHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>; def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, (fneg FPR32:$Ra))), (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, (fneg FPR64:$Ra))), (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; //===----------------------------------------------------------------------===// // Floating point comparison instructions. //===----------------------------------------------------------------------===// defm FCMPE : FPComparison<1, "fcmpe", AArch64strict_fcmpe>; defm FCMP : FPComparison<0, "fcmp", AArch64any_fcmp>; //===----------------------------------------------------------------------===// // Floating point conditional comparison instructions. //===----------------------------------------------------------------------===// defm FCCMPE : FPCondComparison<1, "fccmpe">; defm FCCMP : FPCondComparison<0, "fccmp", AArch64fccmp>; //===----------------------------------------------------------------------===// // Floating point conditional select instruction. //===----------------------------------------------------------------------===// defm FCSEL : FPCondSelect<"fcsel">; let Predicates = [HasFullFP16] in def : Pat<(bf16 (AArch64csel (bf16 FPR16:$Rn), (bf16 FPR16:$Rm), (i32 imm:$cond), NZCV)), (FCSELHrrr FPR16:$Rn, FPR16:$Rm, imm:$cond)>; // CSEL instructions providing f128 types need to be handled by a // pseudo-instruction since the eventual code will need to introduce basic // blocks and control flow. let Predicates = [HasFPARMv8] in def F128CSEL : Pseudo<(outs FPR128:$Rd), (ins FPR128:$Rn, FPR128:$Rm, ccode:$cond), [(set (f128 FPR128:$Rd), (AArch64csel FPR128:$Rn, FPR128:$Rm, (i32 imm:$cond), NZCV))]> { let Uses = [NZCV]; let usesCustomInserter = 1; let hasNoSchedulingInfo = 1; } //===----------------------------------------------------------------------===// // Instructions used for emitting unwind opcodes on ARM64 Windows. //===----------------------------------------------------------------------===// let isPseudo = 1 in { def SEH_StackAlloc : Pseudo<(outs), (ins i32imm:$size), []>, Sched<[]>; def SEH_SaveFPLR : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>; def SEH_SaveFPLR_X : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>; def SEH_SaveReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>; def SEH_SaveReg_X : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>; def SEH_SaveRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; def SEH_SaveRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; def SEH_SaveFReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>; def SEH_SaveFReg_X : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>; def SEH_SaveFRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; def SEH_SaveFRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; def SEH_SetFP : Pseudo<(outs), (ins), []>, Sched<[]>; def SEH_AddFP : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>; def SEH_Nop : Pseudo<(outs), (ins), []>, Sched<[]>; def SEH_PrologEnd : Pseudo<(outs), (ins), []>, Sched<[]>; def SEH_EpilogStart : Pseudo<(outs), (ins), []>, Sched<[]>; def SEH_EpilogEnd : Pseudo<(outs), (ins), []>, Sched<[]>; def SEH_PACSignLR : Pseudo<(outs), (ins), []>, Sched<[]>; def SEH_SaveAnyRegQP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; def SEH_SaveAnyRegQPX : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; } // Pseudo instructions for Windows EH //===----------------------------------------------------------------------===// let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1, isCodeGenOnly = 1, isReturn = 1, isEHScopeReturn = 1, isPseudo = 1 in { - def CLEANUPRET : Pseudo<(outs), (ins), [(cleanupret)]>, Sched<[]>; + def CLEANUPRET : Pseudo<(outs), (ins), [(cleanupret bb)]>, Sched<[]>; let usesCustomInserter = 1 in def CATCHRET : Pseudo<(outs), (ins am_brcond:$dst, am_brcond:$src), [(catchret bb:$dst, bb:$src)]>, Sched<[]>; } // Pseudo instructions for homogeneous prolog/epilog let isPseudo = 1 in { // Save CSRs in order, {FPOffset} def HOM_Prolog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>; // Restore CSRs in order def HOM_Epilog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>; } //===----------------------------------------------------------------------===// // Floating point immediate move. //===----------------------------------------------------------------------===// let isReMaterializable = 1, isAsCheapAsAMove = 1 in { defm FMOV : FPMoveImmediate<"fmov">; } let Predicates = [HasFullFP16] in { def : Pat<(bf16 fpimmbf16:$in), (FMOVHi (fpimm16XForm bf16:$in))>; } //===----------------------------------------------------------------------===// // Advanced SIMD two vector instructions. //===----------------------------------------------------------------------===// defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl", AArch64uabd>; // Match UABDL in log2-shuffle patterns. def : Pat<(abs (v8i16 (sub (zext (v8i8 V64:$opA)), (zext (v8i8 V64:$opB))))), (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>; def : Pat<(abs (v8i16 (sub (zext (extract_high_v16i8 (v16i8 V128:$opA))), (zext (extract_high_v16i8 (v16i8 V128:$opB)))))), (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>; def : Pat<(abs (v4i32 (sub (zext (v4i16 V64:$opA)), (zext (v4i16 V64:$opB))))), (UABDLv4i16_v4i32 V64:$opA, V64:$opB)>; def : Pat<(abs (v4i32 (sub (zext (extract_high_v8i16 (v8i16 V128:$opA))), (zext (extract_high_v8i16 (v8i16 V128:$opB)))))), (UABDLv8i16_v4i32 V128:$opA, V128:$opB)>; def : Pat<(abs (v2i64 (sub (zext (v2i32 V64:$opA)), (zext (v2i32 V64:$opB))))), (UABDLv2i32_v2i64 V64:$opA, V64:$opB)>; def : Pat<(abs (v2i64 (sub (zext (extract_high_v4i32 (v4i32 V128:$opA))), (zext (extract_high_v4i32 (v4i32 V128:$opB)))))), (UABDLv4i32_v2i64 V128:$opA, V128:$opB)>; defm ABS : SIMDTwoVectorBHSD<0, 0b01011, "abs", abs>; defm CLS : SIMDTwoVectorBHS<0, 0b00100, "cls", int_aarch64_neon_cls>; defm CLZ : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>; defm CMEQ : SIMDCmpTwoVector<0, 0b01001, "cmeq", AArch64cmeqz>; defm CMGE : SIMDCmpTwoVector<1, 0b01000, "cmge", AArch64cmgez>; defm CMGT : SIMDCmpTwoVector<0, 0b01000, "cmgt", AArch64cmgtz>; defm CMLE : SIMDCmpTwoVector<1, 0b01001, "cmle", AArch64cmlez>; defm CMLT : SIMDCmpTwoVector<0, 0b01010, "cmlt", AArch64cmltz>; defm CNT : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>; defm FABS : SIMDTwoVectorFPNoException<0, 1, 0b01111, "fabs", fabs>; def : Pat<(v8i8 (AArch64vashr (v8i8 V64:$Rn), (i32 7))), (CMLTv8i8rz V64:$Rn)>; def : Pat<(v4i16 (AArch64vashr (v4i16 V64:$Rn), (i32 15))), (CMLTv4i16rz V64:$Rn)>; def : Pat<(v2i32 (AArch64vashr (v2i32 V64:$Rn), (i32 31))), (CMLTv2i32rz V64:$Rn)>; def : Pat<(v16i8 (AArch64vashr (v16i8 V128:$Rn), (i32 7))), (CMLTv16i8rz V128:$Rn)>; def : Pat<(v8i16 (AArch64vashr (v8i16 V128:$Rn), (i32 15))), (CMLTv8i16rz V128:$Rn)>; def : Pat<(v4i32 (AArch64vashr (v4i32 V128:$Rn), (i32 31))), (CMLTv4i32rz V128:$Rn)>; def : Pat<(v2i64 (AArch64vashr (v2i64 V128:$Rn), (i32 63))), (CMLTv2i64rz V128:$Rn)>; defm FCMEQ : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>; defm FCMGE : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", AArch64fcmgez>; defm FCMGT : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>; defm FCMLE : SIMDFPCmpTwoVector<1, 1, 0b01101, "fcmle", AArch64fcmlez>; defm FCMLT : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", AArch64fcmltz>; defm FCVTAS : SIMDTwoVectorFPToInt<0,0,0b11100, "fcvtas",int_aarch64_neon_fcvtas>; defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, "fcvtau",int_aarch64_neon_fcvtau>; defm FCVTL : SIMDFPWidenTwoVector<0, 0, 0b10111, "fcvtl">; def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (v4i16 V64:$Rn))), (FCVTLv4i16 V64:$Rn)>; def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn), (i64 4)))), (FCVTLv8i16 V128:$Rn)>; def : Pat<(v2f64 (any_fpextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>; def : Pat<(v2f64 (any_fpextend (v2f32 (extract_high_v4f32 (v4f32 V128:$Rn))))), (FCVTLv4i32 V128:$Rn)>; def : Pat<(v4f32 (any_fpextend (v4f16 V64:$Rn))), (FCVTLv4i16 V64:$Rn)>; def : Pat<(v4f32 (any_fpextend (v4f16 (extract_high_v8f16 (v8f16 V128:$Rn))))), (FCVTLv8i16 V128:$Rn)>; defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>; defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>; defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_aarch64_neon_fcvtns>; defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, "fcvtnu",int_aarch64_neon_fcvtnu>; defm FCVTN : SIMDFPNarrowTwoVector<0, 0, 0b10110, "fcvtn">; def : Pat<(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn))), (FCVTNv4i16 V128:$Rn)>; def : Pat<(concat_vectors V64:$Rd, (v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))), (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; def : Pat<(v2f32 (any_fpround (v2f64 V128:$Rn))), (FCVTNv2i32 V128:$Rn)>; def : Pat<(v4f16 (any_fpround (v4f32 V128:$Rn))), (FCVTNv4i16 V128:$Rn)>; def : Pat<(concat_vectors V64:$Rd, (v2f32 (any_fpround (v2f64 V128:$Rn)))), (FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; def : Pat<(concat_vectors V64:$Rd, (v4f16 (any_fpround (v4f32 V128:$Rn)))), (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>; defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_aarch64_neon_fcvtpu>; defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn", AArch64fcvtxnv>; defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", any_fp_to_sint>; defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", any_fp_to_uint>; // AArch64's FCVT instructions saturate when out of range. multiclass SIMDTwoVectorFPToIntSatPats { let Predicates = [HasFullFP16] in { def : Pat<(v4i16 (to_int_sat v4f16:$Rn, i16)), (!cast(INST # v4f16) v4f16:$Rn)>; def : Pat<(v8i16 (to_int_sat v8f16:$Rn, i16)), (!cast(INST # v8f16) v8f16:$Rn)>; } def : Pat<(v2i32 (to_int_sat v2f32:$Rn, i32)), (!cast(INST # v2f32) v2f32:$Rn)>; def : Pat<(v4i32 (to_int_sat v4f32:$Rn, i32)), (!cast(INST # v4f32) v4f32:$Rn)>; def : Pat<(v2i64 (to_int_sat v2f64:$Rn, i64)), (!cast(INST # v2f64) v2f64:$Rn)>; } defm : SIMDTwoVectorFPToIntSatPats; defm : SIMDTwoVectorFPToIntSatPats; def : Pat<(v4i16 (int_aarch64_neon_fcvtzs v4f16:$Rn)), (FCVTZSv4f16 $Rn)>; def : Pat<(v8i16 (int_aarch64_neon_fcvtzs v8f16:$Rn)), (FCVTZSv8f16 $Rn)>; def : Pat<(v2i32 (int_aarch64_neon_fcvtzs v2f32:$Rn)), (FCVTZSv2f32 $Rn)>; def : Pat<(v4i32 (int_aarch64_neon_fcvtzs v4f32:$Rn)), (FCVTZSv4f32 $Rn)>; def : Pat<(v2i64 (int_aarch64_neon_fcvtzs v2f64:$Rn)), (FCVTZSv2f64 $Rn)>; def : Pat<(v4i16 (int_aarch64_neon_fcvtzu v4f16:$Rn)), (FCVTZUv4f16 $Rn)>; def : Pat<(v8i16 (int_aarch64_neon_fcvtzu v8f16:$Rn)), (FCVTZUv8f16 $Rn)>; def : Pat<(v2i32 (int_aarch64_neon_fcvtzu v2f32:$Rn)), (FCVTZUv2f32 $Rn)>; def : Pat<(v4i32 (int_aarch64_neon_fcvtzu v4f32:$Rn)), (FCVTZUv4f32 $Rn)>; def : Pat<(v2i64 (int_aarch64_neon_fcvtzu v2f64:$Rn)), (FCVTZUv2f64 $Rn)>; defm FNEG : SIMDTwoVectorFPNoException<1, 1, 0b01111, "fneg", fneg>; defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>; defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", any_fround>; defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", any_fnearbyint>; defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", any_ffloor>; defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", any_froundeven>; defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", any_fceil>; defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", any_frint>; defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", any_ftrunc>; let Predicates = [HasFRInt3264] in { defm FRINT32Z : FRIntNNTVector<0, 0, "frint32z", int_aarch64_neon_frint32z>; defm FRINT64Z : FRIntNNTVector<0, 1, "frint64z", int_aarch64_neon_frint64z>; defm FRINT32X : FRIntNNTVector<1, 0, "frint32x", int_aarch64_neon_frint32x>; defm FRINT64X : FRIntNNTVector<1, 1, "frint64x", int_aarch64_neon_frint64x>; } // HasFRInt3264 defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>; defm FSQRT : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", any_fsqrt>; defm NEG : SIMDTwoVectorBHSD<1, 0b01011, "neg", UnOpFrag<(sub immAllZerosV, node:$LHS)> >; defm NOT : SIMDTwoVectorB<1, 0b00, 0b00101, "not", vnot>; // Aliases for MVN -> NOT. let Predicates = [HasNEON] in { def : InstAlias<"mvn{ $Vd.8b, $Vn.8b|.8b $Vd, $Vn}", (NOTv8i8 V64:$Vd, V64:$Vn)>; def : InstAlias<"mvn{ $Vd.16b, $Vn.16b|.16b $Vd, $Vn}", (NOTv16i8 V128:$Vd, V128:$Vn)>; } def : Pat<(vnot (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>; def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>; def : Pat<(vnot (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>; def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>; def : Pat<(vnot (v1i64 V64:$Rn)), (NOTv8i8 V64:$Rn)>; def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>; defm RBIT : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", bitreverse>; defm REV16 : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", AArch64rev16>; defm REV32 : SIMDTwoVectorBH<1, 0b00000, "rev32", AArch64rev32>; defm REV64 : SIMDTwoVectorBHS<0, 0b00000, "rev64", AArch64rev64>; defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp", BinOpFrag<(add node:$LHS, (AArch64saddlp node:$RHS))> >; defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", AArch64saddlp>; defm SCVTF : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", any_sint_to_fp>; defm SHLL : SIMDVectorLShiftLongBySizeBHS; defm SQABS : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_aarch64_neon_sqabs>; defm SQNEG : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_aarch64_neon_sqneg>; defm SQXTN : SIMDMixedTwoVector<0, 0b10100, "sqxtn", int_aarch64_neon_sqxtn>; defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", int_aarch64_neon_sqxtun>; defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_aarch64_neon_suqadd>; defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp", BinOpFrag<(add node:$LHS, (AArch64uaddlp node:$RHS))> >; defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp", AArch64uaddlp>; defm UCVTF : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", any_uint_to_fp>; defm UQXTN : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_aarch64_neon_uqxtn>; defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_aarch64_neon_urecpe>; defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>; defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_aarch64_neon_usqadd>; defm XTN : SIMDMixedTwoVector<0, 0b10010, "xtn", trunc>; def : Pat<(v4f16 (AArch64rev32 V64:$Rn)), (REV32v4i16 V64:$Rn)>; def : Pat<(v4f16 (AArch64rev64 V64:$Rn)), (REV64v4i16 V64:$Rn)>; def : Pat<(v4bf16 (AArch64rev32 V64:$Rn)), (REV32v4i16 V64:$Rn)>; def : Pat<(v4bf16 (AArch64rev64 V64:$Rn)), (REV64v4i16 V64:$Rn)>; def : Pat<(v8f16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>; def : Pat<(v8f16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>; def : Pat<(v8bf16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>; def : Pat<(v8bf16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>; def : Pat<(v2f32 (AArch64rev64 V64:$Rn)), (REV64v2i32 V64:$Rn)>; def : Pat<(v4f32 (AArch64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>; // Patterns for vector long shift (by element width). These need to match all // three of zext, sext and anyext so it's easier to pull the patterns out of the // definition. multiclass SIMDVectorLShiftLongBySizeBHSPats { def : Pat<(AArch64vshl (v8i16 (ext (v8i8 V64:$Rn))), (i32 8)), (SHLLv8i8 V64:$Rn)>; def : Pat<(AArch64vshl (v8i16 (ext (extract_high_v16i8 (v16i8 V128:$Rn)))), (i32 8)), (SHLLv16i8 V128:$Rn)>; def : Pat<(AArch64vshl (v4i32 (ext (v4i16 V64:$Rn))), (i32 16)), (SHLLv4i16 V64:$Rn)>; def : Pat<(AArch64vshl (v4i32 (ext (extract_high_v8i16 (v8i16 V128:$Rn)))), (i32 16)), (SHLLv8i16 V128:$Rn)>; def : Pat<(AArch64vshl (v2i64 (ext (v2i32 V64:$Rn))), (i32 32)), (SHLLv2i32 V64:$Rn)>; def : Pat<(AArch64vshl (v2i64 (ext (extract_high_v4i32 (v4i32 V128:$Rn)))), (i32 32)), (SHLLv4i32 V128:$Rn)>; } defm : SIMDVectorLShiftLongBySizeBHSPats; defm : SIMDVectorLShiftLongBySizeBHSPats; defm : SIMDVectorLShiftLongBySizeBHSPats; // Constant vector values, used in the S/UQXTN patterns below. def VImmFF: PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 85))))>; def VImmFFFF: PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 51))))>; def VImm7F: PatLeaf<(AArch64movi_shift (i32 127), (i32 0))>; def VImm80: PatLeaf<(AArch64mvni_shift (i32 127), (i32 0))>; def VImm7FFF: PatLeaf<(AArch64movi_msl (i32 127), (i32 264))>; def VImm8000: PatLeaf<(AArch64mvni_msl (i32 127), (i32 264))>; // trunc(umin(X, 255)) -> UQXTRN v8i8 def : Pat<(v8i8 (trunc (umin (v8i16 V128:$Vn), (v8i16 VImmFF)))), (UQXTNv8i8 V128:$Vn)>; // trunc(umin(X, 65535)) -> UQXTRN v4i16 def : Pat<(v4i16 (trunc (umin (v4i32 V128:$Vn), (v4i32 VImmFFFF)))), (UQXTNv4i16 V128:$Vn)>; // trunc(smin(smax(X, -128), 128)) -> SQXTRN // with reversed min/max def : Pat<(v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)), (v8i16 VImm7F)))), (SQXTNv8i8 V128:$Vn)>; def : Pat<(v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)), (v8i16 VImm80)))), (SQXTNv8i8 V128:$Vn)>; // trunc(smin(smax(X, -32768), 32767)) -> SQXTRN // with reversed min/max def : Pat<(v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)), (v4i32 VImm7FFF)))), (SQXTNv4i16 V128:$Vn)>; def : Pat<(v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)), (v4i32 VImm8000)))), (SQXTNv4i16 V128:$Vn)>; // concat_vectors(Vd, trunc(umin(X, 255))) -> UQXTRN(Vd, Vn) def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Vd), (v8i8 (trunc (umin (v8i16 V128:$Vn), (v8i16 VImmFF)))))), (UQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; // concat_vectors(Vd, trunc(umin(X, 65535))) -> UQXTRN(Vd, Vn) def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Vd), (v4i16 (trunc (umin (v4i32 V128:$Vn), (v4i32 VImmFFFF)))))), (UQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; // concat_vectors(Vd, trunc(smin(smax Vm, -128), 127) ~> SQXTN2(Vd, Vn) // with reversed min/max def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Vd), (v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)), (v8i16 VImm7F)))))), (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Vd), (v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)), (v8i16 VImm80)))))), (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; // concat_vectors(Vd, trunc(smin(smax Vm, -32768), 32767) ~> SQXTN2(Vd, Vn) // with reversed min/max def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Vd), (v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)), (v4i32 VImm7FFF)))))), (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Vd), (v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)), (v4i32 VImm8000)))))), (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; // Select BSWAP vector instructions into REV instructions def : Pat<(v4i16 (bswap (v4i16 V64:$Rn))), (v4i16 (REV16v8i8 (v4i16 V64:$Rn)))>; def : Pat<(v8i16 (bswap (v8i16 V128:$Rn))), (v8i16 (REV16v16i8 (v8i16 V128:$Rn)))>; def : Pat<(v2i32 (bswap (v2i32 V64:$Rn))), (v2i32 (REV32v8i8 (v2i32 V64:$Rn)))>; def : Pat<(v4i32 (bswap (v4i32 V128:$Rn))), (v4i32 (REV32v16i8 (v4i32 V128:$Rn)))>; def : Pat<(v2i64 (bswap (v2i64 V128:$Rn))), (v2i64 (REV64v16i8 (v2i64 V128:$Rn)))>; //===----------------------------------------------------------------------===// // Advanced SIMD three vector instructions. //===----------------------------------------------------------------------===// defm ADD : SIMDThreeSameVector<0, 0b10000, "add", add>; defm ADDP : SIMDThreeSameVector<0, 0b10111, "addp", AArch64addp>; defm CMEQ : SIMDThreeSameVector<1, 0b10001, "cmeq", AArch64cmeq>; defm CMGE : SIMDThreeSameVector<0, 0b00111, "cmge", AArch64cmge>; defm CMGT : SIMDThreeSameVector<0, 0b00110, "cmgt", AArch64cmgt>; defm CMHI : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>; defm CMHS : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>; defm CMTST : SIMDThreeSameVector<0, 0b10001, "cmtst", AArch64cmtst>; foreach VT = [ v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64 ] in { def : Pat<(vnot (AArch64cmeqz VT:$Rn)), (!cast("CMTST"#VT) VT:$Rn, VT:$Rn)>; } defm FABD : SIMDThreeSameVectorFP<1,1,0b010,"fabd", int_aarch64_neon_fabd>; let Predicates = [HasNEON] in { foreach VT = [ v2f32, v4f32, v2f64 ] in def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast("FABD"#VT) VT:$Rn, VT:$Rm)>; } let Predicates = [HasNEON, HasFullFP16] in { foreach VT = [ v4f16, v8f16 ] in def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast("FABD"#VT) VT:$Rn, VT:$Rm)>; } defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",AArch64facge>; defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",AArch64facgt>; defm FADDP : SIMDThreeSameVectorFP<1,0,0b010,"faddp", AArch64faddp>; defm FADD : SIMDThreeSameVectorFP<0,0,0b010,"fadd", any_fadd>; defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>; defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>; defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>; defm FDIV : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", any_fdiv>; defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b000,"fmaxnmp", int_aarch64_neon_fmaxnmp>; defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", any_fmaxnum>; defm FMAXP : SIMDThreeSameVectorFP<1,0,0b110,"fmaxp", int_aarch64_neon_fmaxp>; defm FMAX : SIMDThreeSameVectorFP<0,0,0b110,"fmax", any_fmaximum>; defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b000,"fminnmp", int_aarch64_neon_fminnmp>; defm FMINNM : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", any_fminnum>; defm FMINP : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>; defm FMIN : SIMDThreeSameVectorFP<0,1,0b110,"fmin", any_fminimum>; // NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the // instruction expects the addend first, while the fma intrinsic puts it last. defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla", TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)> >; defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls", TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; defm FMULX : SIMDThreeSameVectorFP<0,0,0b011,"fmulx", int_aarch64_neon_fmulx>; defm FMUL : SIMDThreeSameVectorFP<1,0,0b011,"fmul", any_fmul>; defm FRECPS : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>; defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b111,"frsqrts", int_aarch64_neon_frsqrts>; defm FSUB : SIMDThreeSameVectorFP<0,1,0b010,"fsub", any_fsub>; // MLA and MLS are generated in MachineCombine defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", null_frag>; defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", null_frag>; defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>; defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>; defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba", TriOpFrag<(add node:$LHS, (AArch64sabd node:$MHS, node:$RHS))> >; defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", AArch64sabd>; defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", avgfloors>; defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>; defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>; defm SMAX : SIMDThreeSameVectorBHS<0,0b01100,"smax", smax>; defm SMINP : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_aarch64_neon_sminp>; defm SMIN : SIMDThreeSameVectorBHS<0,0b01101,"smin", smin>; defm SQADD : SIMDThreeSameVector<0,0b00001,"sqadd", int_aarch64_neon_sqadd>; defm SQDMULH : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_aarch64_neon_sqdmulh>; defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>; defm SQRSHL : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>; defm SQSHL : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>; defm SQSUB : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>; defm SRHADD : SIMDThreeSameVectorBHS<0,0b00010,"srhadd", avgceils>; defm SRSHL : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>; defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>; defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>; defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba", TriOpFrag<(add node:$LHS, (AArch64uabd node:$MHS, node:$RHS))> >; defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", AArch64uabd>; defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", avgflooru>; defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>; defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>; defm UMAX : SIMDThreeSameVectorBHS<1,0b01100,"umax", umax>; defm UMINP : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_aarch64_neon_uminp>; defm UMIN : SIMDThreeSameVectorBHS<1,0b01101,"umin", umin>; defm UQADD : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>; defm UQRSHL : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>; defm UQSHL : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>; defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>; defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", avgceilu>; defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>; defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>; defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah", int_aarch64_neon_sqrdmlah>; defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh", int_aarch64_neon_sqrdmlsh>; // Extra saturate patterns, other than the intrinsics matches above defm : SIMDThreeSameVectorExtraPatterns<"SQADD", saddsat>; defm : SIMDThreeSameVectorExtraPatterns<"UQADD", uaddsat>; defm : SIMDThreeSameVectorExtraPatterns<"SQSUB", ssubsat>; defm : SIMDThreeSameVectorExtraPatterns<"UQSUB", usubsat>; defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>; defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic", BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >; defm EOR : SIMDLogicalThreeVector<1, 0b00, "eor", xor>; defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn", BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >; defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>; // Pseudo bitwise select pattern BSP. // It is expanded into BSL/BIT/BIF after register allocation. defm BSP : SIMDLogicalThreeVectorPseudo>; defm BSL : SIMDLogicalThreeVectorTied<1, 0b01, "bsl">; defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit">; defm BIF : SIMDLogicalThreeVectorTied<1, 0b11, "bif">; def : Pat<(AArch64bsp (v8i8 V64:$Rd), V64:$Rn, V64:$Rm), (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; def : Pat<(AArch64bsp (v4i16 V64:$Rd), V64:$Rn, V64:$Rm), (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; def : Pat<(AArch64bsp (v2i32 V64:$Rd), V64:$Rn, V64:$Rm), (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; def : Pat<(AArch64bsp (v1i64 V64:$Rd), V64:$Rn, V64:$Rm), (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; def : Pat<(AArch64bsp (v16i8 V128:$Rd), V128:$Rn, V128:$Rm), (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; def : Pat<(AArch64bsp (v8i16 V128:$Rd), V128:$Rn, V128:$Rm), (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; def : Pat<(AArch64bsp (v4i32 V128:$Rd), V128:$Rn, V128:$Rm), (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; def : Pat<(AArch64bsp (v2i64 V128:$Rd), V128:$Rn, V128:$Rm), (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; // The following SetCC patterns are used for GlobalISel only multiclass SelectSetCC { def : Pat<(v8i8 (InFrag (v8i8 V64:$Rn), (v8i8 V64:$Rm))), (v8i8 (!cast(INST # v8i8) (v8i8 V64:$Rn), (v8i8 V64:$Rm)))>; def : Pat<(v16i8 (InFrag (v16i8 V128:$Rn), (v16i8 V128:$Rm))), (v16i8 (!cast(INST # v16i8) (v16i8 V128:$Rn), (v16i8 V128:$Rm)))>; def : Pat<(v4i16 (InFrag (v4i16 V64:$Rn), (v4i16 V64:$Rm))), (v4i16 (!cast(INST # v4i16) (v4i16 V64:$Rn), (v4i16 V64:$Rm)))>; def : Pat<(v8i16 (InFrag (v8i16 V128:$Rn), (v8i16 V128:$Rm))), (v8i16 (!cast(INST # v8i16) (v8i16 V128:$Rn), (v8i16 V128:$Rm)))>; def : Pat<(v2i32 (InFrag (v2i32 V64:$Rn), (v2i32 V64:$Rm))), (v2i32 (!cast(INST # v2i32) (v2i32 V64:$Rn), (v2i32 V64:$Rm)))>; def : Pat<(v4i32 (InFrag (v4i32 V128:$Rn), (v4i32 V128:$Rm))), (v4i32 (!cast(INST # v4i32) (v4i32 V128:$Rn), (v4i32 V128:$Rm)))>; def : Pat<(v2i64 (InFrag (v2i64 V128:$Rn), (v2i64 V128:$Rm))), (v2i64 (!cast(INST # v2i64) (v2i64 V128:$Rn), (v2i64 V128:$Rm)))>; } defm : SelectSetCC; defm : SelectSetCC; defm : SelectSetCC; defm : SelectSetCC; defm : SelectSetCC; multiclass SelectSetCCSwapOperands { def : Pat<(v8i8 (InFrag (v8i8 V64:$Rn), (v8i8 V64:$Rm))), (v8i8 (!cast(INST # v8i8) (v8i8 V64:$Rm), (v8i8 V64:$Rn)))>; def : Pat<(v16i8 (InFrag (v16i8 V128:$Rn), (v16i8 V128:$Rm))), (v16i8 (!cast(INST # v16i8) (v16i8 V128:$Rm), (v16i8 V128:$Rn)))>; def : Pat<(v4i16 (InFrag (v4i16 V64:$Rn), (v4i16 V64:$Rm))), (v4i16 (!cast(INST # v4i16) (v4i16 V64:$Rm), (v4i16 V64:$Rn)))>; def : Pat<(v8i16 (InFrag (v8i16 V128:$Rn), (v8i16 V128:$Rm))), (v8i16 (!cast(INST # v8i16) (v8i16 V128:$Rm), (v8i16 V128:$Rn)))>; def : Pat<(v2i32 (InFrag (v2i32 V64:$Rn), (v2i32 V64:$Rm))), (v2i32 (!cast(INST # v2i32) (v2i32 V64:$Rm), (v2i32 V64:$Rn)))>; def : Pat<(v4i32 (InFrag (v4i32 V128:$Rn), (v4i32 V128:$Rm))), (v4i32 (!cast(INST # v4i32) (v4i32 V128:$Rm), (v4i32 V128:$Rn)))>; def : Pat<(v2i64 (InFrag (v2i64 V128:$Rn), (v2i64 V128:$Rm))), (v2i64 (!cast(INST # v2i64) (v2i64 V128:$Rm), (v2i64 V128:$Rn)))>; } defm : SelectSetCCSwapOperands; defm : SelectSetCCSwapOperands; defm : SelectSetCCSwapOperands; defm : SelectSetCCSwapOperands; multiclass SelectSetCCZeroRHS { def : Pat<(v8i8 (InFrag (v8i8 V64:$Rn), immAllZerosV)), (v8i8 (!cast(INST # v8i8rz) (v8i8 V64:$Rn)))>; def : Pat<(v16i8 (InFrag (v16i8 V128:$Rn), immAllZerosV)), (v16i8 (!cast(INST # v16i8rz) (v16i8 V128:$Rn)))>; def : Pat<(v4i16 (InFrag (v4i16 V64:$Rn), immAllZerosV)), (v4i16 (!cast(INST # v4i16rz) (v4i16 V64:$Rn)))>; def : Pat<(v8i16 (InFrag (v8i16 V128:$Rn), immAllZerosV)), (v8i16 (!cast(INST # v8i16rz) (v8i16 V128:$Rn)))>; def : Pat<(v2i32 (InFrag (v2i32 V64:$Rn), immAllZerosV)), (v2i32 (!cast(INST # v2i32rz) (v2i32 V64:$Rn)))>; def : Pat<(v4i32 (InFrag (v4i32 V128:$Rn), immAllZerosV)), (v4i32 (!cast(INST # v4i32rz) (v4i32 V128:$Rn)))>; def : Pat<(v2i64 (InFrag (v2i64 V128:$Rn), immAllZerosV)), (v2i64 (!cast(INST # v2i64rz) (v2i64 V128:$Rn)))>; } defm : SelectSetCCZeroRHS; defm : SelectSetCCZeroRHS; defm : SelectSetCCZeroRHS; defm : SelectSetCCZeroRHS; defm : SelectSetCCZeroRHS; multiclass SelectSetCCZeroLHS { def : Pat<(v8i8 (InFrag immAllZerosV, (v8i8 V64:$Rn))), (v8i8 (!cast(INST # v8i8rz) (v8i8 V64:$Rn)))>; def : Pat<(v16i8 (InFrag immAllZerosV, (v16i8 V128:$Rn))), (v16i8 (!cast(INST # v16i8rz) (v16i8 V128:$Rn)))>; def : Pat<(v4i16 (InFrag immAllZerosV, (v4i16 V64:$Rn))), (v4i16 (!cast(INST # v4i16rz) (v4i16 V64:$Rn)))>; def : Pat<(v8i16 (InFrag immAllZerosV, (v8i16 V128:$Rn))), (v8i16 (!cast(INST # v8i16rz) (v8i16 V128:$Rn)))>; def : Pat<(v2i32 (InFrag immAllZerosV, (v2i32 V64:$Rn))), (v2i32 (!cast(INST # v2i32rz) (v2i32 V64:$Rn)))>; def : Pat<(v4i32 (InFrag immAllZerosV, (v4i32 V128:$Rn))), (v4i32 (!cast(INST # v4i32rz) (v4i32 V128:$Rn)))>; def : Pat<(v2i64 (InFrag immAllZerosV, (v2i64 V128:$Rn))), (v2i64 (!cast(INST # v2i64rz) (v2i64 V128:$Rn)))>; } defm : SelectSetCCZeroLHS; defm : SelectSetCCZeroLHS; defm : SelectSetCCZeroLHS; defm : SelectSetCCZeroLHS; defm : SelectSetCCZeroLHS; let Predicates = [HasNEON] in { def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}", (ORRv16i8 V128:$dst, V128:$src, V128:$src), 1>; def : InstAlias<"mov{\t$dst.8h, $src.8h|.8h\t$dst, $src}", (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; def : InstAlias<"mov{\t$dst.4s, $src.4s|.4s\t$dst, $src}", (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; def : InstAlias<"mov{\t$dst.2d, $src.2d|.2d\t$dst, $src}", (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; def : InstAlias<"mov{\t$dst.8b, $src.8b|.8b\t$dst, $src}", (ORRv8i8 V64:$dst, V64:$src, V64:$src), 1>; def : InstAlias<"mov{\t$dst.4h, $src.4h|.4h\t$dst, $src}", (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; def : InstAlias<"mov{\t$dst.2s, $src.2s|.2s\t$dst, $src}", (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; def : InstAlias<"mov{\t$dst.1d, $src.1d|.1d\t$dst, $src}", (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; def : InstAlias<"{cmls\t$dst.8b, $src1.8b, $src2.8b" # "|cmls.8b\t$dst, $src1, $src2}", (CMHSv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; def : InstAlias<"{cmls\t$dst.16b, $src1.16b, $src2.16b" # "|cmls.16b\t$dst, $src1, $src2}", (CMHSv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; def : InstAlias<"{cmls\t$dst.4h, $src1.4h, $src2.4h" # "|cmls.4h\t$dst, $src1, $src2}", (CMHSv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; def : InstAlias<"{cmls\t$dst.8h, $src1.8h, $src2.8h" # "|cmls.8h\t$dst, $src1, $src2}", (CMHSv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; def : InstAlias<"{cmls\t$dst.2s, $src1.2s, $src2.2s" # "|cmls.2s\t$dst, $src1, $src2}", (CMHSv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; def : InstAlias<"{cmls\t$dst.4s, $src1.4s, $src2.4s" # "|cmls.4s\t$dst, $src1, $src2}", (CMHSv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; def : InstAlias<"{cmls\t$dst.2d, $src1.2d, $src2.2d" # "|cmls.2d\t$dst, $src1, $src2}", (CMHSv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; def : InstAlias<"{cmlo\t$dst.8b, $src1.8b, $src2.8b" # "|cmlo.8b\t$dst, $src1, $src2}", (CMHIv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; def : InstAlias<"{cmlo\t$dst.16b, $src1.16b, $src2.16b" # "|cmlo.16b\t$dst, $src1, $src2}", (CMHIv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; def : InstAlias<"{cmlo\t$dst.4h, $src1.4h, $src2.4h" # "|cmlo.4h\t$dst, $src1, $src2}", (CMHIv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; def : InstAlias<"{cmlo\t$dst.8h, $src1.8h, $src2.8h" # "|cmlo.8h\t$dst, $src1, $src2}", (CMHIv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; def : InstAlias<"{cmlo\t$dst.2s, $src1.2s, $src2.2s" # "|cmlo.2s\t$dst, $src1, $src2}", (CMHIv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; def : InstAlias<"{cmlo\t$dst.4s, $src1.4s, $src2.4s" # "|cmlo.4s\t$dst, $src1, $src2}", (CMHIv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; def : InstAlias<"{cmlo\t$dst.2d, $src1.2d, $src2.2d" # "|cmlo.2d\t$dst, $src1, $src2}", (CMHIv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; def : InstAlias<"{cmle\t$dst.8b, $src1.8b, $src2.8b" # "|cmle.8b\t$dst, $src1, $src2}", (CMGEv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; def : InstAlias<"{cmle\t$dst.16b, $src1.16b, $src2.16b" # "|cmle.16b\t$dst, $src1, $src2}", (CMGEv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; def : InstAlias<"{cmle\t$dst.4h, $src1.4h, $src2.4h" # "|cmle.4h\t$dst, $src1, $src2}", (CMGEv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; def : InstAlias<"{cmle\t$dst.8h, $src1.8h, $src2.8h" # "|cmle.8h\t$dst, $src1, $src2}", (CMGEv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; def : InstAlias<"{cmle\t$dst.2s, $src1.2s, $src2.2s" # "|cmle.2s\t$dst, $src1, $src2}", (CMGEv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; def : InstAlias<"{cmle\t$dst.4s, $src1.4s, $src2.4s" # "|cmle.4s\t$dst, $src1, $src2}", (CMGEv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; def : InstAlias<"{cmle\t$dst.2d, $src1.2d, $src2.2d" # "|cmle.2d\t$dst, $src1, $src2}", (CMGEv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; def : InstAlias<"{cmlt\t$dst.8b, $src1.8b, $src2.8b" # "|cmlt.8b\t$dst, $src1, $src2}", (CMGTv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; def : InstAlias<"{cmlt\t$dst.16b, $src1.16b, $src2.16b" # "|cmlt.16b\t$dst, $src1, $src2}", (CMGTv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; def : InstAlias<"{cmlt\t$dst.4h, $src1.4h, $src2.4h" # "|cmlt.4h\t$dst, $src1, $src2}", (CMGTv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; def : InstAlias<"{cmlt\t$dst.8h, $src1.8h, $src2.8h" # "|cmlt.8h\t$dst, $src1, $src2}", (CMGTv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; def : InstAlias<"{cmlt\t$dst.2s, $src1.2s, $src2.2s" # "|cmlt.2s\t$dst, $src1, $src2}", (CMGTv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; def : InstAlias<"{cmlt\t$dst.4s, $src1.4s, $src2.4s" # "|cmlt.4s\t$dst, $src1, $src2}", (CMGTv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; def : InstAlias<"{cmlt\t$dst.2d, $src1.2d, $src2.2d" # "|cmlt.2d\t$dst, $src1, $src2}", (CMGTv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; let Predicates = [HasNEON, HasFullFP16] in { def : InstAlias<"{fcmle\t$dst.4h, $src1.4h, $src2.4h" # "|fcmle.4h\t$dst, $src1, $src2}", (FCMGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; def : InstAlias<"{fcmle\t$dst.8h, $src1.8h, $src2.8h" # "|fcmle.8h\t$dst, $src1, $src2}", (FCMGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; } def : InstAlias<"{fcmle\t$dst.2s, $src1.2s, $src2.2s" # "|fcmle.2s\t$dst, $src1, $src2}", (FCMGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; def : InstAlias<"{fcmle\t$dst.4s, $src1.4s, $src2.4s" # "|fcmle.4s\t$dst, $src1, $src2}", (FCMGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; def : InstAlias<"{fcmle\t$dst.2d, $src1.2d, $src2.2d" # "|fcmle.2d\t$dst, $src1, $src2}", (FCMGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; let Predicates = [HasNEON, HasFullFP16] in { def : InstAlias<"{fcmlt\t$dst.4h, $src1.4h, $src2.4h" # "|fcmlt.4h\t$dst, $src1, $src2}", (FCMGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; def : InstAlias<"{fcmlt\t$dst.8h, $src1.8h, $src2.8h" # "|fcmlt.8h\t$dst, $src1, $src2}", (FCMGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; } def : InstAlias<"{fcmlt\t$dst.2s, $src1.2s, $src2.2s" # "|fcmlt.2s\t$dst, $src1, $src2}", (FCMGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; def : InstAlias<"{fcmlt\t$dst.4s, $src1.4s, $src2.4s" # "|fcmlt.4s\t$dst, $src1, $src2}", (FCMGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; def : InstAlias<"{fcmlt\t$dst.2d, $src1.2d, $src2.2d" # "|fcmlt.2d\t$dst, $src1, $src2}", (FCMGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; let Predicates = [HasNEON, HasFullFP16] in { def : InstAlias<"{facle\t$dst.4h, $src1.4h, $src2.4h" # "|facle.4h\t$dst, $src1, $src2}", (FACGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; def : InstAlias<"{facle\t$dst.8h, $src1.8h, $src2.8h" # "|facle.8h\t$dst, $src1, $src2}", (FACGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; } def : InstAlias<"{facle\t$dst.2s, $src1.2s, $src2.2s" # "|facle.2s\t$dst, $src1, $src2}", (FACGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; def : InstAlias<"{facle\t$dst.4s, $src1.4s, $src2.4s" # "|facle.4s\t$dst, $src1, $src2}", (FACGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; def : InstAlias<"{facle\t$dst.2d, $src1.2d, $src2.2d" # "|facle.2d\t$dst, $src1, $src2}", (FACGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; let Predicates = [HasNEON, HasFullFP16] in { def : InstAlias<"{faclt\t$dst.4h, $src1.4h, $src2.4h" # "|faclt.4h\t$dst, $src1, $src2}", (FACGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; def : InstAlias<"{faclt\t$dst.8h, $src1.8h, $src2.8h" # "|faclt.8h\t$dst, $src1, $src2}", (FACGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; } def : InstAlias<"{faclt\t$dst.2s, $src1.2s, $src2.2s" # "|faclt.2s\t$dst, $src1, $src2}", (FACGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; def : InstAlias<"{faclt\t$dst.4s, $src1.4s, $src2.4s" # "|faclt.4s\t$dst, $src1, $src2}", (FACGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; def : InstAlias<"{faclt\t$dst.2d, $src1.2d, $src2.2d" # "|faclt.2d\t$dst, $src1, $src2}", (FACGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; } //===----------------------------------------------------------------------===// // Advanced SIMD three scalar instructions. //===----------------------------------------------------------------------===// defm ADD : SIMDThreeScalarD<0, 0b10000, "add", add>; defm CMEQ : SIMDThreeScalarD<1, 0b10001, "cmeq", AArch64cmeq>; defm CMGE : SIMDThreeScalarD<0, 0b00111, "cmge", AArch64cmge>; defm CMGT : SIMDThreeScalarD<0, 0b00110, "cmgt", AArch64cmgt>; defm CMHI : SIMDThreeScalarD<1, 0b00110, "cmhi", AArch64cmhi>; defm CMHS : SIMDThreeScalarD<1, 0b00111, "cmhs", AArch64cmhs>; defm CMTST : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>; defm FABD : SIMDFPThreeScalar<1, 1, 0b010, "fabd", int_aarch64_sisd_fabd>; def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), (FABD64 FPR64:$Rn, FPR64:$Rm)>; let Predicates = [HasNEON, HasFullFP16] in { def : Pat<(fabs (fsub f16:$Rn, f16:$Rm)), (FABD16 f16:$Rn, f16:$Rm)>; } let Predicates = [HasNEON] in { def : Pat<(fabs (fsub f32:$Rn, f32:$Rm)), (FABD32 f32:$Rn, f32:$Rm)>; def : Pat<(fabs (fsub f64:$Rn, f64:$Rm)), (FABD64 f64:$Rn, f64:$Rm)>; } defm FACGE : SIMDThreeScalarFPCmp<1, 0, 0b101, "facge", int_aarch64_neon_facge>; defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt", int_aarch64_neon_facgt>; defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>; defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>; defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>; defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONandIsStreamingSafe>; defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONandIsStreamingSafe>; defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONandIsStreamingSafe>; defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>; defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>; defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>; defm SQRSHL : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_aarch64_neon_sqrshl>; defm SQSHL : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl>; defm SQSUB : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub>; defm SRSHL : SIMDThreeScalarD< 0, 0b01010, "srshl", int_aarch64_neon_srshl>; defm SSHL : SIMDThreeScalarD< 0, 0b01000, "sshl", int_aarch64_neon_sshl>; defm SUB : SIMDThreeScalarD< 1, 0b10000, "sub", sub>; defm UQADD : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd>; defm UQRSHL : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_aarch64_neon_uqrshl>; defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>; defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>; defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_aarch64_neon_urshl>; defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>; let Predicates = [HasRDM] in { defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">; defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">; def : Pat<(i32 (int_aarch64_neon_sqrdmlah (i32 FPR32:$Rd), (i32 FPR32:$Rn), (i32 FPR32:$Rm))), (SQRDMLAHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>; def : Pat<(i32 (int_aarch64_neon_sqrdmlsh (i32 FPR32:$Rd), (i32 FPR32:$Rn), (i32 FPR32:$Rm))), (SQRDMLSHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>; } defm : FMULScalarFromIndexedLane0Patterns<"FMULX", "16", "32", "64", int_aarch64_neon_fmulx, [HasNEONandIsStreamingSafe]>; let Predicates = [HasNEON] in { def : InstAlias<"cmls $dst, $src1, $src2", (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; def : InstAlias<"cmle $dst, $src1, $src2", (CMGEv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; def : InstAlias<"cmlo $dst, $src1, $src2", (CMHIv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; def : InstAlias<"cmlt $dst, $src1, $src2", (CMGTv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; } let Predicates = [HasFPARMv8] in { def : InstAlias<"fcmle $dst, $src1, $src2", (FCMGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; def : InstAlias<"fcmle $dst, $src1, $src2", (FCMGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; def : InstAlias<"fcmlt $dst, $src1, $src2", (FCMGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; def : InstAlias<"fcmlt $dst, $src1, $src2", (FCMGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; def : InstAlias<"facle $dst, $src1, $src2", (FACGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; def : InstAlias<"facle $dst, $src1, $src2", (FACGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; def : InstAlias<"faclt $dst, $src1, $src2", (FACGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; def : InstAlias<"faclt $dst, $src1, $src2", (FACGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; } //===----------------------------------------------------------------------===// // Advanced SIMD three scalar instructions (mixed operands). //===----------------------------------------------------------------------===// defm SQDMULL : SIMDThreeScalarMixedHS<0, 0b11010, "sqdmull", int_aarch64_neon_sqdmulls_scalar>; defm SQDMLAL : SIMDThreeScalarMixedTiedHS<0, 0b10010, "sqdmlal">; defm SQDMLSL : SIMDThreeScalarMixedTiedHS<0, 0b10110, "sqdmlsl">; def : Pat<(i64 (int_aarch64_neon_sqadd (i64 FPR64:$Rd), (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), (i32 FPR32:$Rm))))), (SQDMLALi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>; def : Pat<(i64 (int_aarch64_neon_sqsub (i64 FPR64:$Rd), (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), (i32 FPR32:$Rm))))), (SQDMLSLi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>; //===----------------------------------------------------------------------===// // Advanced SIMD two scalar instructions. //===----------------------------------------------------------------------===// defm ABS : SIMDTwoScalarD< 0, 0b01011, "abs", abs, [HasNoCSSC]>; defm CMEQ : SIMDCmpTwoScalarD< 0, 0b01001, "cmeq", AArch64cmeqz>; defm CMGE : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", AArch64cmgez>; defm CMGT : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", AArch64cmgtz>; defm CMLE : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", AArch64cmlez>; defm CMLT : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", AArch64cmltz>; defm FCMEQ : SIMDFPCmpTwoScalar<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>; defm FCMGE : SIMDFPCmpTwoScalar<1, 1, 0b01100, "fcmge", AArch64fcmgez>; defm FCMGT : SIMDFPCmpTwoScalar<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>; defm FCMLE : SIMDFPCmpTwoScalar<1, 1, 0b01101, "fcmle", AArch64fcmlez>; defm FCMLT : SIMDFPCmpTwoScalar<0, 1, 0b01110, "fcmlt", AArch64fcmltz>; defm FCVTAS : SIMDFPTwoScalar< 0, 0, 0b11100, "fcvtas">; defm FCVTAU : SIMDFPTwoScalar< 1, 0, 0b11100, "fcvtau">; defm FCVTMS : SIMDFPTwoScalar< 0, 0, 0b11011, "fcvtms">; defm FCVTMU : SIMDFPTwoScalar< 1, 0, 0b11011, "fcvtmu">; defm FCVTNS : SIMDFPTwoScalar< 0, 0, 0b11010, "fcvtns">; defm FCVTNU : SIMDFPTwoScalar< 1, 0, 0b11010, "fcvtnu">; defm FCVTPS : SIMDFPTwoScalar< 0, 1, 0b11010, "fcvtps">; defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu">; def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">; defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">; defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">; defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe">; defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx">; defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte">; defm NEG : SIMDTwoScalarD< 1, 0b01011, "neg", UnOpFrag<(sub immAllZerosV, node:$LHS)> >; defm SCVTF : SIMDFPTwoScalarCVT< 0, 0, 0b11101, "scvtf", AArch64sitof>; defm SQABS : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_aarch64_neon_sqabs>; defm SQNEG : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_aarch64_neon_sqneg>; defm SQXTN : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_aarch64_neon_scalar_sqxtn>; defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_aarch64_neon_scalar_sqxtun>; defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd", int_aarch64_neon_suqadd>; defm UCVTF : SIMDFPTwoScalarCVT< 1, 0, 0b11101, "ucvtf", AArch64uitof>; defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar_uqxtn>; defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd", int_aarch64_neon_usqadd>; def : Pat<(v1i64 (AArch64vashr (v1i64 V64:$Rn), (i32 63))), (CMLTv1i64rz V64:$Rn)>; // Round FP64 to BF16. let Predicates = [HasNEONandIsStreamingSafe, HasBF16] in def : Pat<(bf16 (any_fpround (f64 FPR64:$Rn))), (BFCVT (FCVTXNv1i64 $Rn))>; def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))), (FCVTASv1i64 FPR64:$Rn)>; def : Pat<(v1i64 (int_aarch64_neon_fcvtau (v1f64 FPR64:$Rn))), (FCVTAUv1i64 FPR64:$Rn)>; def : Pat<(v1i64 (int_aarch64_neon_fcvtms (v1f64 FPR64:$Rn))), (FCVTMSv1i64 FPR64:$Rn)>; def : Pat<(v1i64 (int_aarch64_neon_fcvtmu (v1f64 FPR64:$Rn))), (FCVTMUv1i64 FPR64:$Rn)>; def : Pat<(v1i64 (int_aarch64_neon_fcvtns (v1f64 FPR64:$Rn))), (FCVTNSv1i64 FPR64:$Rn)>; def : Pat<(v1i64 (int_aarch64_neon_fcvtnu (v1f64 FPR64:$Rn))), (FCVTNUv1i64 FPR64:$Rn)>; def : Pat<(v1i64 (int_aarch64_neon_fcvtps (v1f64 FPR64:$Rn))), (FCVTPSv1i64 FPR64:$Rn)>; def : Pat<(v1i64 (int_aarch64_neon_fcvtpu (v1f64 FPR64:$Rn))), (FCVTPUv1i64 FPR64:$Rn)>; def : Pat<(v1i64 (int_aarch64_neon_fcvtzs (v1f64 FPR64:$Rn))), (FCVTZSv1i64 FPR64:$Rn)>; def : Pat<(v1i64 (int_aarch64_neon_fcvtzu (v1f64 FPR64:$Rn))), (FCVTZUv1i64 FPR64:$Rn)>; def : Pat<(f16 (int_aarch64_neon_frecpe (f16 FPR16:$Rn))), (FRECPEv1f16 FPR16:$Rn)>; def : Pat<(f32 (int_aarch64_neon_frecpe (f32 FPR32:$Rn))), (FRECPEv1i32 FPR32:$Rn)>; def : Pat<(f64 (int_aarch64_neon_frecpe (f64 FPR64:$Rn))), (FRECPEv1i64 FPR64:$Rn)>; def : Pat<(v1f64 (int_aarch64_neon_frecpe (v1f64 FPR64:$Rn))), (FRECPEv1i64 FPR64:$Rn)>; def : Pat<(f32 (AArch64frecpe (f32 FPR32:$Rn))), (FRECPEv1i32 FPR32:$Rn)>; def : Pat<(v2f32 (AArch64frecpe (v2f32 V64:$Rn))), (FRECPEv2f32 V64:$Rn)>; def : Pat<(v4f32 (AArch64frecpe (v4f32 FPR128:$Rn))), (FRECPEv4f32 FPR128:$Rn)>; def : Pat<(f64 (AArch64frecpe (f64 FPR64:$Rn))), (FRECPEv1i64 FPR64:$Rn)>; def : Pat<(v1f64 (AArch64frecpe (v1f64 FPR64:$Rn))), (FRECPEv1i64 FPR64:$Rn)>; def : Pat<(v2f64 (AArch64frecpe (v2f64 FPR128:$Rn))), (FRECPEv2f64 FPR128:$Rn)>; def : Pat<(f32 (AArch64frecps (f32 FPR32:$Rn), (f32 FPR32:$Rm))), (FRECPS32 FPR32:$Rn, FPR32:$Rm)>; def : Pat<(v2f32 (AArch64frecps (v2f32 V64:$Rn), (v2f32 V64:$Rm))), (FRECPSv2f32 V64:$Rn, V64:$Rm)>; def : Pat<(v4f32 (AArch64frecps (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))), (FRECPSv4f32 FPR128:$Rn, FPR128:$Rm)>; def : Pat<(f64 (AArch64frecps (f64 FPR64:$Rn), (f64 FPR64:$Rm))), (FRECPS64 FPR64:$Rn, FPR64:$Rm)>; def : Pat<(v2f64 (AArch64frecps (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))), (FRECPSv2f64 FPR128:$Rn, FPR128:$Rm)>; def : Pat<(f16 (int_aarch64_neon_frecpx (f16 FPR16:$Rn))), (FRECPXv1f16 FPR16:$Rn)>; def : Pat<(f32 (int_aarch64_neon_frecpx (f32 FPR32:$Rn))), (FRECPXv1i32 FPR32:$Rn)>; def : Pat<(f64 (int_aarch64_neon_frecpx (f64 FPR64:$Rn))), (FRECPXv1i64 FPR64:$Rn)>; def : Pat<(f16 (int_aarch64_neon_frsqrte (f16 FPR16:$Rn))), (FRSQRTEv1f16 FPR16:$Rn)>; def : Pat<(f32 (int_aarch64_neon_frsqrte (f32 FPR32:$Rn))), (FRSQRTEv1i32 FPR32:$Rn)>; def : Pat<(f64 (int_aarch64_neon_frsqrte (f64 FPR64:$Rn))), (FRSQRTEv1i64 FPR64:$Rn)>; def : Pat<(v1f64 (int_aarch64_neon_frsqrte (v1f64 FPR64:$Rn))), (FRSQRTEv1i64 FPR64:$Rn)>; def : Pat<(f32 (AArch64frsqrte (f32 FPR32:$Rn))), (FRSQRTEv1i32 FPR32:$Rn)>; def : Pat<(v2f32 (AArch64frsqrte (v2f32 V64:$Rn))), (FRSQRTEv2f32 V64:$Rn)>; def : Pat<(v4f32 (AArch64frsqrte (v4f32 FPR128:$Rn))), (FRSQRTEv4f32 FPR128:$Rn)>; def : Pat<(f64 (AArch64frsqrte (f64 FPR64:$Rn))), (FRSQRTEv1i64 FPR64:$Rn)>; def : Pat<(v1f64 (AArch64frsqrte (v1f64 FPR64:$Rn))), (FRSQRTEv1i64 FPR64:$Rn)>; def : Pat<(v2f64 (AArch64frsqrte (v2f64 FPR128:$Rn))), (FRSQRTEv2f64 FPR128:$Rn)>; def : Pat<(f32 (AArch64frsqrts (f32 FPR32:$Rn), (f32 FPR32:$Rm))), (FRSQRTS32 FPR32:$Rn, FPR32:$Rm)>; def : Pat<(v2f32 (AArch64frsqrts (v2f32 V64:$Rn), (v2f32 V64:$Rm))), (FRSQRTSv2f32 V64:$Rn, V64:$Rm)>; def : Pat<(v4f32 (AArch64frsqrts (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))), (FRSQRTSv4f32 FPR128:$Rn, FPR128:$Rm)>; def : Pat<(f64 (AArch64frsqrts (f64 FPR64:$Rn), (f64 FPR64:$Rm))), (FRSQRTS64 FPR64:$Rn, FPR64:$Rm)>; def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))), (FRSQRTSv2f64 FPR128:$Rn, FPR128:$Rm)>; // Some float -> int -> float conversion patterns for which we want to keep the // int values in FP registers using the corresponding NEON instructions to // avoid more costly int <-> fp register transfers. let Predicates = [HasNEONandIsStreamingSafe] in { def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))), (SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>; def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))), (SCVTFv1i32 (i32 (FCVTZSv1i32 f32:$Rn)))>; def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))), (UCVTFv1i64 (i64 (FCVTZUv1i64 f64:$Rn)))>; def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))), (UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>; let Predicates = [HasNEONandIsStreamingSafe, HasFullFP16] in { def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))), (SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>; def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))), (UCVTFv1i16 (f16 (FCVTZUv1f16 f16:$Rn)))>; } // int -> float conversion of value in lane 0 of simd vector should use // correct cvtf variant to avoid costly fpr <-> gpr register transfers. def : Pat<(f32 (sint_to_fp (i32 (vector_extract (v4i32 FPR128:$Rn), (i64 0))))), (SCVTFv1i32 (i32 (EXTRACT_SUBREG (v4i32 FPR128:$Rn), ssub)))>; def : Pat<(f32 (uint_to_fp (i32 (vector_extract (v4i32 FPR128:$Rn), (i64 0))))), (UCVTFv1i32 (i32 (EXTRACT_SUBREG (v4i32 FPR128:$Rn), ssub)))>; def : Pat<(f64 (sint_to_fp (i64 (vector_extract (v2i64 FPR128:$Rn), (i64 0))))), (SCVTFv1i64 (i64 (EXTRACT_SUBREG (v2i64 FPR128:$Rn), dsub)))>; def : Pat<(f64 (uint_to_fp (i64 (vector_extract (v2i64 FPR128:$Rn), (i64 0))))), (UCVTFv1i64 (i64 (EXTRACT_SUBREG (v2i64 FPR128:$Rn), dsub)))>; // fp16: integer extraction from vector must be at least 32-bits to be legal. // Actual extraction result is then an in-reg sign-extension of lower 16-bits. let Predicates = [HasNEONandIsStreamingSafe, HasFullFP16] in { def : Pat<(f16 (sint_to_fp (i32 (sext_inreg (i32 (vector_extract (v8i16 FPR128:$Rn), (i64 0))), i16)))), (SCVTFv1i16 (f16 (EXTRACT_SUBREG (v8i16 FPR128:$Rn), hsub)))>; // unsigned 32-bit extracted element is truncated to 16-bits using AND def : Pat<(f16 (uint_to_fp (i32 (and (i32 (vector_extract (v8i16 FPR128:$Rn), (i64 0))), (i32 65535))))), (UCVTFv1i16 (f16 (EXTRACT_SUBREG (v8i16 FPR128:$Rn), hsub)))>; } // If an integer is about to be converted to a floating point value, // just load it on the floating point unit. // Here are the patterns for 8 and 16-bits to float. // 8-bits -> float. multiclass UIntToFPROLoadPat { def : Pat<(DstTy (uint_to_fp (SrcTy (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))))), (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)), (LDRW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend), sub))>; def : Pat<(DstTy (uint_to_fp (SrcTy (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Wext:$extend))))), (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)), (LDRX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend), sub))>; } defm : UIntToFPROLoadPat; def : Pat <(f32 (uint_to_fp (i32 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>; def : Pat <(f32 (uint_to_fp (i32 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))), (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>; // 16-bits -> float. defm : UIntToFPROLoadPat; def : Pat <(f32 (uint_to_fp (i32 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>; def : Pat <(f32 (uint_to_fp (i32 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>; // 32-bits are handled in target specific dag combine: // performIntToFpCombine. // 64-bits integer to 32-bits floating point, not possible with // UCVTF on floating point registers (both source and destination // must have the same size). // Here are the patterns for 8, 16, 32, and 64-bits to double. // 8-bits -> double. defm : UIntToFPROLoadPat; def : Pat <(f64 (uint_to_fp (i32 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>; def : Pat <(f64 (uint_to_fp (i32 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))), (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>; // 16-bits -> double. defm : UIntToFPROLoadPat; def : Pat <(f64 (uint_to_fp (i32 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>; def : Pat <(f64 (uint_to_fp (i32 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>; // 32-bits -> double. defm : UIntToFPROLoadPat; def : Pat <(f64 (uint_to_fp (i32 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub))>; def : Pat <(f64 (uint_to_fp (i32 (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))), (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>; // 64-bits -> double are handled in target specific dag combine: // performIntToFpCombine. } // let Predicates = [HasNEONandIsStreamingSafe] //===----------------------------------------------------------------------===// // Advanced SIMD three different-sized vector instructions. //===----------------------------------------------------------------------===// defm ADDHN : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_aarch64_neon_addhn>; defm SUBHN : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_aarch64_neon_subhn>; defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn>; defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>; defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull", AArch64pmull>; defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal", AArch64sabd>; defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl", AArch64sabd>; defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl", BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>; defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw", BinOpFrag<(add node:$LHS, (sext node:$RHS))>>; defm SMLAL : SIMDLongThreeVectorTiedBHS<0, 0b1000, "smlal", TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>; defm SMLSL : SIMDLongThreeVectorTiedBHS<0, 0b1010, "smlsl", TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>; defm SMULL : SIMDLongThreeVectorBHS<0, 0b1100, "smull", AArch64smull>; defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal", int_aarch64_neon_sqadd>; defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl", int_aarch64_neon_sqsub>; defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull", int_aarch64_neon_sqdmull>; defm SSUBL : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl", BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>; defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw", BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>; defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal", AArch64uabd>; defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl", BinOpFrag<(add (zanyext node:$LHS), (zanyext node:$RHS))>>; defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw", BinOpFrag<(add node:$LHS, (zanyext node:$RHS))>>; defm UMLAL : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal", TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>; defm UMLSL : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl", TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>; defm UMULL : SIMDLongThreeVectorBHS<1, 0b1100, "umull", AArch64umull>; defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl", BinOpFrag<(sub (zanyext node:$LHS), (zanyext node:$RHS))>>; defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw", BinOpFrag<(sub node:$LHS, (zanyext node:$RHS))>>; // Additional patterns for [SU]ML[AS]L multiclass Neon_mul_acc_widen_patterns { def : Pat<(v4i16 (opnode V64:$Ra, (v4i16 (extract_subvector (vecopnode (v8i8 V64:$Rn),(v8i8 V64:$Rm)), (i64 0))))), (EXTRACT_SUBREG (v8i16 (INST8B (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Ra, dsub), V64:$Rn, V64:$Rm)), dsub)>; def : Pat<(v2i32 (opnode V64:$Ra, (v2i32 (extract_subvector (vecopnode (v4i16 V64:$Rn),(v4i16 V64:$Rm)), (i64 0))))), (EXTRACT_SUBREG (v4i32 (INST4H (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Ra, dsub), V64:$Rn, V64:$Rm)), dsub)>; def : Pat<(v1i64 (opnode V64:$Ra, (v1i64 (extract_subvector (vecopnode (v2i32 V64:$Rn),(v2i32 V64:$Rm)), (i64 0))))), (EXTRACT_SUBREG (v2i64 (INST2S (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Ra, dsub), V64:$Rn, V64:$Rm)), dsub)>; } defm : Neon_mul_acc_widen_patterns; defm : Neon_mul_acc_widen_patterns; defm : Neon_mul_acc_widen_patterns; defm : Neon_mul_acc_widen_patterns; multiclass Neon_addl_extract_patterns { def : Pat<(v4i16 (opnode (extract_subvector (ext (v8i8 V64:$Rn)), (i64 0)), (extract_subvector (ext (v8i8 V64:$Rm)), (i64 0)))), (EXTRACT_SUBREG (v8i16 (!cast(Inst#"Lv8i8_v8i16") V64:$Rn, V64:$Rm)), dsub)>; def : Pat<(v2i32 (opnode (extract_subvector (ext (v4i16 V64:$Rn)), (i64 0)), (extract_subvector (ext (v4i16 V64:$Rm)), (i64 0)))), (EXTRACT_SUBREG (v4i32 (!cast(Inst#"Lv4i16_v4i32") V64:$Rn, V64:$Rm)), dsub)>; def : Pat<(v1i64 (opnode (extract_subvector (ext (v2i32 V64:$Rn)), (i64 0)), (extract_subvector (ext (v2i32 V64:$Rm)), (i64 0)))), (EXTRACT_SUBREG (v2i64 (!cast(Inst#"Lv2i32_v2i64") V64:$Rn, V64:$Rm)), dsub)>; def : Pat<(v4i16 (opnode (v4i16 V64:$Rn), (extract_subvector (ext (v8i8 V64:$Rm)), (i64 0)))), (EXTRACT_SUBREG (v8i16 (!cast(Inst#"Wv8i8_v8i16") (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>; def : Pat<(v2i32 (opnode (v2i32 V64:$Rn), (extract_subvector (ext (v4i16 V64:$Rm)), (i64 0)))), (EXTRACT_SUBREG (v4i32 (!cast(Inst#"Wv4i16_v4i32") (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>; def : Pat<(v1i64 (opnode (v1i64 V64:$Rn), (extract_subvector (ext (v2i32 V64:$Rm)), (i64 0)))), (EXTRACT_SUBREG (v2i64 (!cast(Inst#"Wv2i32_v2i64") (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>; } defm : Neon_addl_extract_patterns; defm : Neon_addl_extract_patterns; defm : Neon_addl_extract_patterns; defm : Neon_addl_extract_patterns; // CodeGen patterns for addhn and subhn instructions, which can actually be // written in LLVM IR without too much difficulty. // Prioritize ADDHN and SUBHN over UZP2. let AddedComplexity = 10 in { // ADDHN def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))), (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>; def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 16))))), (ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>; def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 32))))), (ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>; def : Pat<(concat_vectors (v8i8 V64:$Rd), (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))), (ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), V128:$Rn, V128:$Rm)>; def : Pat<(concat_vectors (v4i16 V64:$Rd), (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 16))))), (ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), V128:$Rn, V128:$Rm)>; def : Pat<(concat_vectors (v2i32 V64:$Rd), (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 32))))), (ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), V128:$Rn, V128:$Rm)>; // SUBHN def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))), (SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>; def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 16))))), (SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>; def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 32))))), (SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>; def : Pat<(concat_vectors (v8i8 V64:$Rd), (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))), (SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), V128:$Rn, V128:$Rm)>; def : Pat<(concat_vectors (v4i16 V64:$Rd), (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 16))))), (SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), V128:$Rn, V128:$Rm)>; def : Pat<(concat_vectors (v2i32 V64:$Rd), (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 32))))), (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), V128:$Rn, V128:$Rm)>; } // AddedComplexity = 10 //---------------------------------------------------------------------------- // AdvSIMD bitwise extract from vector instruction. //---------------------------------------------------------------------------- defm EXT : SIMDBitwiseExtract<"ext">; def AdjustExtImm : SDNodeXFormgetTargetConstant(8 + N->getZExtValue(), SDLoc(N), MVT::i32); }]>; multiclass ExtPat { def : Pat<(VT64 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>; def : Pat<(VT128 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; // We use EXT to handle extract_subvector to copy the upper 64-bits of a // 128-bit vector. def : Pat<(VT64 (extract_subvector V128:$Rn, (i64 N))), (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; // A 64-bit EXT of two halves of the same 128-bit register can be done as a // single 128-bit EXT. def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 0)), (extract_subvector V128:$Rn, (i64 N)), (i32 imm:$imm))), (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, imm:$imm), dsub)>; // A 64-bit EXT of the high half of a 128-bit register can be done using a // 128-bit EXT of the whole register with an adjustment to the immediate. The // top half of the other operand will be unset, but that doesn't matter as it // will not be used. def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 N)), V64:$Rm, (i32 imm:$imm))), (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), (AdjustExtImm imm:$imm)), dsub)>; } defm : ExtPat; defm : ExtPat; defm : ExtPat; defm : ExtPat; defm : ExtPat; defm : ExtPat; defm : ExtPat; defm : ExtPat; //---------------------------------------------------------------------------- // AdvSIMD zip vector //---------------------------------------------------------------------------- defm TRN1 : SIMDZipVector<0b010, "trn1", AArch64trn1>; defm TRN2 : SIMDZipVector<0b110, "trn2", AArch64trn2>; defm UZP1 : SIMDZipVector<0b001, "uzp1", AArch64uzp1>; defm UZP2 : SIMDZipVector<0b101, "uzp2", AArch64uzp2>; defm ZIP1 : SIMDZipVector<0b011, "zip1", AArch64zip1>; defm ZIP2 : SIMDZipVector<0b111, "zip2", AArch64zip2>; def trunc_optional_assert_ext : PatFrags<(ops node:$op0), [(trunc node:$op0), (assertzext (trunc node:$op0)), (assertsext (trunc node:$op0))]>; // concat_vectors(trunc(x), trunc(y)) -> uzp1(x, y) // concat_vectors(assertzext(trunc(x)), assertzext(trunc(y))) -> uzp1(x, y) // concat_vectors(assertsext(trunc(x)), assertsext(trunc(y))) -> uzp1(x, y) class concat_trunc_to_uzp1_pat : Pat<(ConcatTy (concat_vectors (TruncTy (trunc_optional_assert_ext (SrcTy V128:$Vn))), (TruncTy (trunc_optional_assert_ext (SrcTy V128:$Vm))))), (!cast("UZP1"#ConcatTy) V128:$Vn, V128:$Vm)>; def : concat_trunc_to_uzp1_pat; def : concat_trunc_to_uzp1_pat; def : concat_trunc_to_uzp1_pat; // trunc(concat_vectors(trunc(x), trunc(y))) -> xtn(uzp1(x, y)) // trunc(concat_vectors(assertzext(trunc(x)), assertzext(trunc(y)))) -> xtn(uzp1(x, y)) // trunc(concat_vectors(assertsext(trunc(x)), assertsext(trunc(y)))) -> xtn(uzp1(x, y)) class trunc_concat_trunc_to_xtn_uzp1_pat : Pat<(Ty (trunc_optional_assert_ext (ConcatTy (concat_vectors (TruncTy (trunc_optional_assert_ext (SrcTy V128:$Vn))), (TruncTy (trunc_optional_assert_ext (SrcTy V128:$Vm))))))), (!cast("XTN"#Ty) (!cast("UZP1"#ConcatTy) V128:$Vn, V128:$Vm))>; def : trunc_concat_trunc_to_xtn_uzp1_pat; def : trunc_concat_trunc_to_xtn_uzp1_pat; def : Pat<(v8i8 (trunc (concat_vectors (v4i16 V64:$Vn), (v4i16 V64:$Vm)))), (UZP1v8i8 V64:$Vn, V64:$Vm)>; def : Pat<(v4i16 (trunc (concat_vectors (v2i32 V64:$Vn), (v2i32 V64:$Vm)))), (UZP1v4i16 V64:$Vn, V64:$Vm)>; def : Pat<(v16i8 (concat_vectors (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vn), (i32 8)))), (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vm), (i32 8)))))), (UZP2v16i8 V128:$Vn, V128:$Vm)>; def : Pat<(v8i16 (concat_vectors (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vn), (i32 16)))), (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vm), (i32 16)))))), (UZP2v8i16 V128:$Vn, V128:$Vm)>; def : Pat<(v4i32 (concat_vectors (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vn), (i32 32)))), (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vm), (i32 32)))))), (UZP2v4i32 V128:$Vn, V128:$Vm)>; //---------------------------------------------------------------------------- // AdvSIMD TBL/TBX instructions //---------------------------------------------------------------------------- defm TBL : SIMDTableLookup< 0, "tbl">; defm TBX : SIMDTableLookupTied<1, "tbx">; def : Pat<(v8i8 (int_aarch64_neon_tbl1 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))), (TBLv8i8One VecListOne128:$Rn, V64:$Ri)>; def : Pat<(v16i8 (int_aarch64_neon_tbl1 (v16i8 V128:$Ri), (v16i8 V128:$Rn))), (TBLv16i8One V128:$Ri, V128:$Rn)>; def : Pat<(v8i8 (int_aarch64_neon_tbx1 (v8i8 V64:$Rd), (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))), (TBXv8i8One V64:$Rd, VecListOne128:$Rn, V64:$Ri)>; def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd), (v16i8 V128:$Ri), (v16i8 V128:$Rn))), (TBXv16i8One V128:$Rd, V128:$Ri, V128:$Rn)>; //---------------------------------------------------------------------------- // AdvSIMD LUT instructions //---------------------------------------------------------------------------- let Predicates = [HasLUT] in { defm LUT2 : BaseSIMDTableLookupIndexed2<"luti2">; defm LUT4 : BaseSIMDTableLookupIndexed4<"luti4">; } //---------------------------------------------------------------------------- // AdvSIMD scalar DUP instruction //---------------------------------------------------------------------------- defm DUP : SIMDScalarDUP<"mov">; //---------------------------------------------------------------------------- // AdvSIMD scalar pairwise instructions //---------------------------------------------------------------------------- defm ADDP : SIMDPairwiseScalarD<0, 0b11011, "addp">; defm FADDP : SIMDFPPairwiseScalar<0, 0b01101, "faddp">; defm FMAXNMP : SIMDFPPairwiseScalar<0, 0b01100, "fmaxnmp">; defm FMAXP : SIMDFPPairwiseScalar<0, 0b01111, "fmaxp">; defm FMINNMP : SIMDFPPairwiseScalar<1, 0b01100, "fminnmp">; defm FMINP : SIMDFPPairwiseScalar<1, 0b01111, "fminp">; // Only the lower half of the result of the inner FADDP is used in the patterns // below, so the second operand does not matter. Re-use the first input // operand, so no additional dependencies need to be introduced. let Predicates = [HasFullFP16] in { def : Pat<(f16 (vecreduce_fadd (v8f16 V128:$Rn))), (FADDPv2i16p (EXTRACT_SUBREG (FADDPv8f16 (FADDPv8f16 V128:$Rn, V128:$Rn), V128:$Rn), dsub))>; def : Pat<(f16 (vecreduce_fadd (v4f16 V64:$Rn))), (FADDPv2i16p (FADDPv4f16 V64:$Rn, V64:$Rn))>; } def : Pat<(f32 (vecreduce_fadd (v4f32 V128:$Rn))), (FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>; def : Pat<(f32 (vecreduce_fadd (v2f32 V64:$Rn))), (FADDPv2i32p V64:$Rn)>; def : Pat<(f64 (vecreduce_fadd (v2f64 V128:$Rn))), (FADDPv2i64p V128:$Rn)>; def : Pat<(v2i64 (AArch64saddv V128:$Rn)), (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>; def : Pat<(v2i64 (AArch64uaddv V128:$Rn)), (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>; def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))), (FADDPv2i32p V64:$Rn)>; def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))), (FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>; def : Pat<(f64 (int_aarch64_neon_faddv (v2f64 V128:$Rn))), (FADDPv2i64p V128:$Rn)>; def : Pat<(f32 (AArch64fmaxnmv (v2f32 V64:$Rn))), (FMAXNMPv2i32p V64:$Rn)>; def : Pat<(f64 (AArch64fmaxnmv (v2f64 V128:$Rn))), (FMAXNMPv2i64p V128:$Rn)>; def : Pat<(f32 (AArch64fmaxv (v2f32 V64:$Rn))), (FMAXPv2i32p V64:$Rn)>; def : Pat<(f64 (AArch64fmaxv (v2f64 V128:$Rn))), (FMAXPv2i64p V128:$Rn)>; def : Pat<(f32 (AArch64fminnmv (v2f32 V64:$Rn))), (FMINNMPv2i32p V64:$Rn)>; def : Pat<(f64 (AArch64fminnmv (v2f64 V128:$Rn))), (FMINNMPv2i64p V128:$Rn)>; def : Pat<(f32 (AArch64fminv (v2f32 V64:$Rn))), (FMINPv2i32p V64:$Rn)>; def : Pat<(f64 (AArch64fminv (v2f64 V128:$Rn))), (FMINPv2i64p V128:$Rn)>; //---------------------------------------------------------------------------- // AdvSIMD INS/DUP instructions //---------------------------------------------------------------------------- def DUPv8i8gpr : SIMDDupFromMain<0, {?,?,?,?,1}, ".8b", v8i8, V64, GPR32>; def DUPv16i8gpr : SIMDDupFromMain<1, {?,?,?,?,1}, ".16b", v16i8, V128, GPR32>; def DUPv4i16gpr : SIMDDupFromMain<0, {?,?,?,1,0}, ".4h", v4i16, V64, GPR32>; def DUPv8i16gpr : SIMDDupFromMain<1, {?,?,?,1,0}, ".8h", v8i16, V128, GPR32>; def DUPv2i32gpr : SIMDDupFromMain<0, {?,?,1,0,0}, ".2s", v2i32, V64, GPR32>; def DUPv4i32gpr : SIMDDupFromMain<1, {?,?,1,0,0}, ".4s", v4i32, V128, GPR32>; def DUPv2i64gpr : SIMDDupFromMain<1, {?,1,0,0,0}, ".2d", v2i64, V128, GPR64>; def DUPv2i64lane : SIMDDup64FromElement; def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>; def DUPv4i32lane : SIMDDup32FromElement<1, ".4s", v4i32, V128>; def DUPv4i16lane : SIMDDup16FromElement<0, ".4h", v4i16, V64>; def DUPv8i16lane : SIMDDup16FromElement<1, ".8h", v8i16, V128>; def DUPv8i8lane : SIMDDup8FromElement <0, ".8b", v8i8, V64>; def DUPv16i8lane : SIMDDup8FromElement <1, ".16b", v16i8, V128>; // DUP from a 64-bit register to a 64-bit register is just a copy def : Pat<(v1i64 (AArch64dup (i64 GPR64:$Rn))), (COPY_TO_REGCLASS GPR64:$Rn, FPR64)>; def : Pat<(v1f64 (AArch64dup (f64 FPR64:$Rn))), (COPY_TO_REGCLASS FPR64:$Rn, FPR64)>; def : Pat<(v2f32 (AArch64dup (f32 FPR32:$Rn))), (v2f32 (DUPv2i32lane (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub), (i64 0)))>; def : Pat<(v4f32 (AArch64dup (f32 FPR32:$Rn))), (v4f32 (DUPv4i32lane (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub), (i64 0)))>; def : Pat<(v2f64 (AArch64dup (f64 FPR64:$Rn))), (v2f64 (DUPv2i64lane (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rn, dsub), (i64 0)))>; def : Pat<(v4f16 (AArch64dup (f16 FPR16:$Rn))), (v4f16 (DUPv4i16lane (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), (i64 0)))>; def : Pat<(v4bf16 (AArch64dup (bf16 FPR16:$Rn))), (v4bf16 (DUPv4i16lane (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), (i64 0)))>; def : Pat<(v8f16 (AArch64dup (f16 FPR16:$Rn))), (v8f16 (DUPv8i16lane (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), (i64 0)))>; def : Pat<(v8bf16 (AArch64dup (bf16 FPR16:$Rn))), (v8bf16 (DUPv8i16lane (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), (i64 0)))>; def : Pat<(v4f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)), (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>; def : Pat<(v8f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)), (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>; def : Pat<(v4bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)), (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>; def : Pat<(v8bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)), (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>; def : Pat<(v2f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), (DUPv2i32lane V128:$Rn, VectorIndexS:$imm)>; def : Pat<(v4f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), (DUPv4i32lane V128:$Rn, VectorIndexS:$imm)>; def : Pat<(v2f64 (AArch64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)), (DUPv2i64lane V128:$Rn, VectorIndexD:$imm)>; // If there's an (AArch64dup (vector_extract ...) ...), we can use a duplane // instruction even if the types don't match: we just have to remap the lane // carefully. N.b. this trick only applies to truncations. def VecIndex_x2 : SDNodeXFormgetTargetConstant(2 * N->getZExtValue(), SDLoc(N), MVT::i64); }]>; def VecIndex_x4 : SDNodeXFormgetTargetConstant(4 * N->getZExtValue(), SDLoc(N), MVT::i64); }]>; def VecIndex_x8 : SDNodeXFormgetTargetConstant(8 * N->getZExtValue(), SDLoc(N), MVT::i64); }]>; multiclass DUPWithTruncPats { def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src128VT V128:$Rn), imm:$idx)))), (DUP V128:$Rn, (IdxXFORM imm:$idx))>; def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src64VT V64:$Rn), imm:$idx)))), (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>; } defm : DUPWithTruncPats; defm : DUPWithTruncPats; defm : DUPWithTruncPats; defm : DUPWithTruncPats; defm : DUPWithTruncPats; defm : DUPWithTruncPats; multiclass DUPWithTrunci64Pats { def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v2i64 V128:$Rn), imm:$idx))))), (DUP V128:$Rn, (IdxXFORM imm:$idx))>; def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v1i64 V64:$Rn), imm:$idx))))), (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>; } defm : DUPWithTrunci64Pats; defm : DUPWithTrunci64Pats; defm : DUPWithTrunci64Pats; defm : DUPWithTrunci64Pats; defm : DUPWithTrunci64Pats; defm : DUPWithTrunci64Pats; // SMOV and UMOV definitions, with some extra patterns for convenience defm SMOV : SMov; defm UMOV : UMov; def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8), (i32 (SMOVvi8to32 V128:$Rn, VectorIndexB:$idx))>; def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8), (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>; def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>; def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>; def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>; def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))), (i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>; def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx)))), i8), (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>; def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx)))), i16), (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>; // Extracting i8 or i16 elements will have the zero-extend transformed to // an 'and' mask by type legalization since neither i8 nor i16 are legal types // for AArch64. Match these patterns here since UMOV already zeroes out the high // bits of the destination register. def : Pat<(and (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), (i32 0xff)), (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx))>; def : Pat<(and (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx), (i32 0xffff)), (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx))>; def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx)))), (i64 0xff))), (SUBREG_TO_REG (i64 0), (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx)), sub_32)>; def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx)))), (i64 0xffff))), (SUBREG_TO_REG (i64 0), (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx)), sub_32)>; defm INS : SIMDIns; def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)), (SUBREG_TO_REG (i32 0), (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)), (SUBREG_TO_REG (i32 0), (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; // The top bits will be zero from the FMOVWSr def : Pat<(v8i8 (bitconvert (i64 (zext GPR32:$Rn)))), (SUBREG_TO_REG (i32 0), (f32 (FMOVWSr GPR32:$Rn)), ssub)>; def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)), (SUBREG_TO_REG (i32 0), (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)), (SUBREG_TO_REG (i32 0), (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))), (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))), (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))), (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))), (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; def : Pat<(v2i32 (scalar_to_vector (i32 FPR32:$Rn))), (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (i32 FPR32:$Rn), ssub))>; def : Pat<(v4i32 (scalar_to_vector (i32 FPR32:$Rn))), (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (i32 FPR32:$Rn), ssub))>; def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))), (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (i64 FPR64:$Rn), dsub))>; def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))), (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))), (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))), (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))), (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))), (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>; def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))), (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>; def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$Rn))), (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rn, dsub)>; def : Pat<(v4f16 (vector_insert (v4f16 V64:$Rn), (f16 FPR16:$Rm), (i64 VectorIndexS:$imm))), (EXTRACT_SUBREG (INSvi16lane (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)), VectorIndexS:$imm, (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), (i64 0)), dsub)>; def : Pat<(vector_insert (v8f16 V128:$Rn), (f16 fpimm0), (i64 VectorIndexH:$imm)), (INSvi16gpr V128:$Rn, VectorIndexH:$imm, WZR)>; def : Pat<(vector_insert (v4f16 V64:$Rn), (f16 fpimm0), (i64 VectorIndexH:$imm)), (EXTRACT_SUBREG (INSvi16gpr (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)), VectorIndexH:$imm, WZR), dsub)>; def : Pat<(vector_insert (v4f32 V128:$Rn), (f32 fpimm0), (i64 VectorIndexS:$imm)), (INSvi32gpr V128:$Rn, VectorIndexS:$imm, WZR)>; def : Pat<(vector_insert (v2f32 V64:$Rn), (f32 fpimm0), (i64 VectorIndexS:$imm)), (EXTRACT_SUBREG (INSvi32gpr (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)), VectorIndexS:$imm, WZR), dsub)>; def : Pat<(vector_insert v2f64:$Rn, (f64 fpimm0), (i64 VectorIndexD:$imm)), (INSvi64gpr V128:$Rn, VectorIndexS:$imm, XZR)>; def : Pat<(v8f16 (vector_insert (v8f16 V128:$Rn), (f16 FPR16:$Rm), (i64 VectorIndexH:$imm))), (INSvi16lane V128:$Rn, VectorIndexH:$imm, (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), (i64 0))>; def : Pat<(v4bf16 (vector_insert (v4bf16 V64:$Rn), (bf16 FPR16:$Rm), (i64 VectorIndexS:$imm))), (EXTRACT_SUBREG (INSvi16lane (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), V64:$Rn, dsub)), VectorIndexS:$imm, (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), (i64 0)), dsub)>; def : Pat<(v8bf16 (vector_insert (v8bf16 V128:$Rn), (bf16 FPR16:$Rm), (i64 VectorIndexH:$imm))), (INSvi16lane V128:$Rn, VectorIndexH:$imm, (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), (i64 0))>; def : Pat<(v2f32 (vector_insert (v2f32 V64:$Rn), (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))), (EXTRACT_SUBREG (INSvi32lane (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)), VectorIndexS:$imm, (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)), (i64 0)), dsub)>; def : Pat<(v4f32 (vector_insert (v4f32 V128:$Rn), (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))), (INSvi32lane V128:$Rn, VectorIndexS:$imm, (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)), (i64 0))>; def : Pat<(v2f64 (vector_insert (v2f64 V128:$Rn), (f64 FPR64:$Rm), (i64 VectorIndexD:$imm))), (INSvi64lane V128:$Rn, VectorIndexD:$imm, (v2f64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rm, dsub)), (i64 0))>; def : Pat<(v2i32 (vector_insert (v2i32 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexS:$imm))), (EXTRACT_SUBREG (INSvi32gpr (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Rn, dsub)), VectorIndexS:$imm, GPR32:$Rm), dsub)>; def : Pat<(v4i16 (vector_insert (v4i16 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexH:$imm))), (EXTRACT_SUBREG (INSvi16gpr (v8i16 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Rn, dsub)), VectorIndexH:$imm, GPR32:$Rm), dsub)>; def : Pat<(v8i8 (vector_insert (v8i8 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexB:$imm))), (EXTRACT_SUBREG (INSvi8gpr (v16i8 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), V64:$Rn, dsub)), VectorIndexB:$imm, GPR32:$Rm), dsub)>; def : Pat<(v8i8 (vector_insert (v8i8 V64:$Rn), (i8 FPR8:$Rm), (i64 VectorIndexB:$imm))), (EXTRACT_SUBREG (INSvi8lane (v16i8 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), V64:$Rn, dsub)), VectorIndexB:$imm, (v16i8 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR8:$Rm, bsub)), (i64 0)), dsub)>; def : Pat<(v16i8 (vector_insert (v16i8 V128:$Rn), (i8 FPR8:$Rm), (i64 VectorIndexB:$imm))), (INSvi8lane V128:$Rn, VectorIndexB:$imm, (v16i8 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR8:$Rm, bsub)), (i64 0))>; // Copy an element at a constant index in one vector into a constant indexed // element of another. // FIXME refactor to a shared class/dev parameterized on vector type, vector // index type and INS extension def : Pat<(v16i8 (int_aarch64_neon_vcopy_lane (v16i8 V128:$Vd), VectorIndexB:$idx, (v16i8 V128:$Vs), VectorIndexB:$idx2)), (v16i8 (INSvi8lane V128:$Vd, VectorIndexB:$idx, V128:$Vs, VectorIndexB:$idx2) )>; def : Pat<(v8i16 (int_aarch64_neon_vcopy_lane (v8i16 V128:$Vd), VectorIndexH:$idx, (v8i16 V128:$Vs), VectorIndexH:$idx2)), (v8i16 (INSvi16lane V128:$Vd, VectorIndexH:$idx, V128:$Vs, VectorIndexH:$idx2) )>; def : Pat<(v4i32 (int_aarch64_neon_vcopy_lane (v4i32 V128:$Vd), VectorIndexS:$idx, (v4i32 V128:$Vs), VectorIndexS:$idx2)), (v4i32 (INSvi32lane V128:$Vd, VectorIndexS:$idx, V128:$Vs, VectorIndexS:$idx2) )>; def : Pat<(v2i64 (int_aarch64_neon_vcopy_lane (v2i64 V128:$Vd), VectorIndexD:$idx, (v2i64 V128:$Vs), VectorIndexD:$idx2)), (v2i64 (INSvi64lane V128:$Vd, VectorIndexD:$idx, V128:$Vs, VectorIndexD:$idx2) )>; multiclass Neon_INS_elt_pattern { def : Pat<(VT128 (vector_insert V128:$src, (VTScal (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))), (i64 imm:$Immd))), (INS V128:$src, imm:$Immd, V128:$Rn, imm:$Immn)>; def : Pat<(VT128 (vector_insert V128:$src, (VTScal (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))), (i64 imm:$Immd))), (INS V128:$src, imm:$Immd, (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>; def : Pat<(VT64 (vector_insert V64:$src, (VTScal (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))), (i64 imm:$Immd))), (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), imm:$Immd, V128:$Rn, imm:$Immn), dsub)>; def : Pat<(VT64 (vector_insert V64:$src, (VTScal (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))), (i64 imm:$Immd))), (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), imm:$Immd, (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn), dsub)>; } defm : Neon_INS_elt_pattern; defm : Neon_INS_elt_pattern; defm : Neon_INS_elt_pattern; defm : Neon_INS_elt_pattern; defm : Neon_INS_elt_pattern; defm : Neon_INS_elt_pattern; defm : Neon_INS_elt_pattern; defm : Neon_INS_elt_pattern; // Insert from bitcast // vector_insert(bitcast(f32 src), n, lane) -> INSvi32lane(src, lane, INSERT_SUBREG(-, n), 0) def : Pat<(v4i32 (vector_insert v4i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), (i64 imm:$Immd))), (INSvi32lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$Sn, ssub), 0)>; def : Pat<(v2i32 (vector_insert v2i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), (i64 imm:$Immd))), (EXTRACT_SUBREG (INSvi32lane (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$src, dsub)), imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$Sn, ssub), 0), dsub)>; def : Pat<(v2i64 (vector_insert v2i64:$src, (i64 (bitconvert (f64 FPR64:$Sn))), (i64 imm:$Immd))), (INSvi64lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$Sn, dsub), 0)>; // bitcast of an extract // f32 bitcast(vector_extract(v4i32 src, lane)) -> EXTRACT_SUBREG(INSvi32lane(-, 0, src, lane)) def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, imm:$Immd)))), (EXTRACT_SUBREG (INSvi32lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), ssub)>; def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, (i64 0))))), (EXTRACT_SUBREG V128:$src, ssub)>; def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, imm:$Immd)))), (EXTRACT_SUBREG (INSvi64lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), dsub)>; def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, (i64 0))))), (EXTRACT_SUBREG V128:$src, dsub)>; // Floating point vector extractions are codegen'd as either a sequence of // subregister extractions, or a MOV (aka DUP here) if // the lane number is anything other than zero. def : Pat<(f64 (vector_extract (v2f64 V128:$Rn), (i64 0))), (f64 (EXTRACT_SUBREG V128:$Rn, dsub))>; def : Pat<(f32 (vector_extract (v4f32 V128:$Rn), (i64 0))), (f32 (EXTRACT_SUBREG V128:$Rn, ssub))>; def : Pat<(f16 (vector_extract (v8f16 V128:$Rn), (i64 0))), (f16 (EXTRACT_SUBREG V128:$Rn, hsub))>; def : Pat<(bf16 (vector_extract (v8bf16 V128:$Rn), (i64 0))), (bf16 (EXTRACT_SUBREG V128:$Rn, hsub))>; def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx), (f64 (DUPi64 V128:$Rn, VectorIndexD:$idx))>; def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx), (f32 (DUPi32 V128:$Rn, VectorIndexS:$idx))>; def : Pat<(vector_extract (v8f16 V128:$Rn), VectorIndexH:$idx), (f16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>; def : Pat<(vector_extract (v8bf16 V128:$Rn), VectorIndexH:$idx), (bf16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>; // All concat_vectors operations are canonicalised to act on i64 vectors for // AArch64. In the general case we need an instruction, which had just as well be // INS. multiclass ConcatPat { def : Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)), (INSvi64lane (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 1, (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 0)>; // If the high lanes are zero we can instead emit a d->d register mov, which // will implicitly clear the upper bits. def : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), immAllZerosV)), (SUBREG_TO_REG (i64 0), (FMOVDr V64:$Rn), dsub)>; // If the high lanes are undef we can just ignore them: def : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), undef)), (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub)>; } defm : ConcatPat; defm : ConcatPat; defm : ConcatPat; defm : ConcatPat; defm : ConcatPat; defm : ConcatPat; defm : ConcatPat; defm : ConcatPat; //---------------------------------------------------------------------------- // AdvSIMD across lanes instructions //---------------------------------------------------------------------------- defm ADDV : SIMDAcrossLanesBHS<0, 0b11011, "addv">; defm SMAXV : SIMDAcrossLanesBHS<0, 0b01010, "smaxv">; defm SMINV : SIMDAcrossLanesBHS<0, 0b11010, "sminv">; defm UMAXV : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">; defm UMINV : SIMDAcrossLanesBHS<1, 0b11010, "uminv">; defm SADDLV : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">; defm UADDLV : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">; defm FMAXNMV : SIMDFPAcrossLanes<0b01100, 0, "fmaxnmv", AArch64fmaxnmv>; defm FMAXV : SIMDFPAcrossLanes<0b01111, 0, "fmaxv", AArch64fmaxv>; defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", AArch64fminnmv>; defm FMINV : SIMDFPAcrossLanes<0b01111, 1, "fminv", AArch64fminv>; multiclass SIMDAcrossLaneLongPairIntrinsic { // Patterns for addv(addlp(x)) ==> addlv def : Pat<(i32 (vector_extract (v8i16 (insert_subvector undef, (v4i16 (AArch64uaddv (v4i16 (addlp (v8i8 V64:$op))))), (i64 0))), (i64 0))), (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), (!cast(Opc#"v8i8v") V64:$op), hsub), ssub)>; def : Pat<(i32 (vector_extract (v8i16 (AArch64uaddv (v8i16 (addlp (v16i8 V128:$op))))), (i64 0))), (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), (!cast(Opc#"v16i8v") V128:$op), hsub), ssub)>; def : Pat<(v4i32 (AArch64uaddv (v4i32 (addlp (v8i16 V128:$op))))), (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (!cast(Opc#"v8i16v") V128:$op), ssub)>; // Patterns for addp(addlp(x))) ==> addlv def : Pat<(v2i32 (AArch64uaddv (v2i32 (addlp (v4i16 V64:$op))))), (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (!cast(Opc#"v4i16v") V64:$op), ssub)>; def : Pat<(v2i64 (AArch64uaddv (v2i64 (addlp (v4i32 V128:$op))))), (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (!cast(Opc#"v4i32v") V128:$op), dsub)>; } defm : SIMDAcrossLaneLongPairIntrinsic<"UADDLV", AArch64uaddlp>; defm : SIMDAcrossLaneLongPairIntrinsic<"SADDLV", AArch64saddlp>; // Pattern is used for GlobalISel multiclass SIMDAcrossLaneLongPairIntrinsicGISel { // Patterns for addv(addlp(x)) ==> addlv def : Pat<(i16 (vecreduce_add (v4i16 (addlp (v8i8 V64:$Rn))))), (!cast(Opc#"v8i8v") V64:$Rn)>; def : Pat<(i16 (vecreduce_add (v8i16 (addlp (v16i8 V128:$Rn))))), (!cast(Opc#"v16i8v") V128:$Rn)>; def : Pat<(i32 (vecreduce_add (v4i32 (addlp (v8i16 V128:$Rn))))), (!cast(Opc#"v8i16v") V128:$Rn)>; // Patterns for addp(addlp(x))) ==> addlv def : Pat<(i32 (vecreduce_add (v2i32 (addlp (v4i16 V64:$Rn))))), (!cast(Opc#"v4i16v") V64:$Rn)>; def : Pat<(i64 (vecreduce_add (v2i64 (addlp (v4i32 V128:$Rn))))), (!cast(Opc#"v4i32v") V128:$Rn)>; } defm : SIMDAcrossLaneLongPairIntrinsicGISel<"UADDLV", AArch64uaddlp>; defm : SIMDAcrossLaneLongPairIntrinsicGISel<"SADDLV", AArch64saddlp>; // Patterns for uaddlv(uaddlp(x)) ==> uaddlv def : Pat<(i64 (int_aarch64_neon_uaddlv (v4i32 (AArch64uaddlp (v8i16 V128:$op))))), (i64 (EXTRACT_SUBREG (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv8i16v V128:$op), ssub)), dsub))>; def : Pat<(i32 (int_aarch64_neon_uaddlv (v8i16 (AArch64uaddlp (v16i8 V128:$op))))), (i32 (EXTRACT_SUBREG (v8i16 (SUBREG_TO_REG (i64 0), (UADDLVv16i8v V128:$op), hsub)), ssub))>; def : Pat<(v2i64 (AArch64uaddlv (v4i32 (AArch64uaddlp (v8i16 V128:$op))))), (v2i64 (SUBREG_TO_REG (i64 0), (UADDLVv8i16v V128:$op), ssub))>; def : Pat<(v4i32 (AArch64uaddlv (v8i16 (AArch64uaddlp (v16i8 V128:$op))))), (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv16i8v V128:$op), hsub))>; def : Pat<(v4i32 (AArch64uaddlv (v4i16 (AArch64uaddlp (v8i8 V64:$op))))), (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv8i8v V64:$op), hsub))>; multiclass SIMDAcrossLaneLongReductionIntrinsic { def : Pat<(v4i32 (addlv (v8i8 V64:$Rn))), (v4i32 (SUBREG_TO_REG (i64 0), (!cast(Opc#"v8i8v") V64:$Rn), hsub))>; def : Pat<(v4i32 (addlv (v4i16 V64:$Rn))), (v4i32 (SUBREG_TO_REG (i64 0), (!cast(Opc#"v4i16v") V64:$Rn), ssub))>; def : Pat<(v4i32 (addlv (v16i8 V128:$Rn))), (v4i32 (SUBREG_TO_REG (i64 0), (!cast(Opc#"v16i8v") V128:$Rn), hsub))>; def : Pat<(v4i32 (addlv (v8i16 V128:$Rn))), (v4i32 (SUBREG_TO_REG (i64 0), (!cast(Opc#"v8i16v") V128:$Rn), ssub))>; def : Pat<(v2i64 (addlv (v4i32 V128:$Rn))), (v2i64 (SUBREG_TO_REG (i64 0), (!cast(Opc#"v4i32v") V128:$Rn), dsub))>; } defm : SIMDAcrossLaneLongReductionIntrinsic<"UADDLV", AArch64uaddlv>; defm : SIMDAcrossLaneLongReductionIntrinsic<"SADDLV", AArch64saddlv>; // Patterns for across-vector intrinsics, that have a node equivalent, that // returns a vector (with only the low lane defined) instead of a scalar. // In effect, opNode is the same as (scalar_to_vector (IntNode)). multiclass SIMDAcrossLanesIntrinsic { // If a lane instruction caught the vector_extract around opNode, we can // directly match the latter to the instruction. def : Pat<(v8i8 (opNode V64:$Rn)), (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), (!cast(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub)>; def : Pat<(v16i8 (opNode V128:$Rn)), (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (!cast(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub)>; def : Pat<(v4i16 (opNode V64:$Rn)), (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), (!cast(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub)>; def : Pat<(v8i16 (opNode V128:$Rn)), (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), (!cast(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub)>; def : Pat<(v4i32 (opNode V128:$Rn)), (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (!cast(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub)>; // If none did, fallback to the explicit patterns, consuming the vector_extract. def : Pat<(i32 (vector_extract (insert_subvector undef, (v8i8 (opNode V64:$Rn)), (i64 0)), (i64 0))), (EXTRACT_SUBREG (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), (!cast(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), ssub)>; def : Pat<(i32 (vector_extract (v16i8 (opNode V128:$Rn)), (i64 0))), (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (!cast(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), ssub)>; def : Pat<(i32 (vector_extract (insert_subvector undef, (v4i16 (opNode V64:$Rn)), (i64 0)), (i64 0))), (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), (!cast(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), ssub)>; def : Pat<(i32 (vector_extract (v8i16 (opNode V128:$Rn)), (i64 0))), (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), (!cast(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), ssub)>; def : Pat<(i32 (vector_extract (v4i32 (opNode V128:$Rn)), (i64 0))), (EXTRACT_SUBREG (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (!cast(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub), ssub)>; } multiclass SIMDAcrossLanesSignedIntrinsic : SIMDAcrossLanesIntrinsic { // If there is a sign extension after this intrinsic, consume it as smov already // performed it def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef, (opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), i8)), (i32 (SMOVvi8to32 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (!cast(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), (i64 0)))>; def : Pat<(i32 (sext_inreg (i32 (vector_extract (opNode (v16i8 V128:$Rn)), (i64 0))), i8)), (i32 (SMOVvi8to32 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (!cast(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), (i64 0)))>; def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef, (opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), i16)), (i32 (SMOVvi16to32 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (!cast(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), (i64 0)))>; def : Pat<(i32 (sext_inreg (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))), i16)), (i32 (SMOVvi16to32 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (!cast(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), (i64 0)))>; } multiclass SIMDAcrossLanesUnsignedIntrinsic : SIMDAcrossLanesIntrinsic { // If there is a masking operation keeping only what has been actually // generated, consume it. def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef, (opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), maski8_or_more)), (i32 (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (!cast(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), ssub))>; def : Pat<(i32 (and (i32 (vector_extract (opNode (v16i8 V128:$Rn)), (i64 0))), maski8_or_more)), (i32 (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (!cast(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), ssub))>; def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef, (opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), maski16_or_more)), (i32 (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (!cast(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), ssub))>; def : Pat<(i32 (and (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))), maski16_or_more)), (i32 (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (!cast(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), ssub))>; } // For vecreduce_add, used by GlobalISel not SDAG def : Pat<(i8 (vecreduce_add (v8i8 V64:$Rn))), (i8 (ADDVv8i8v V64:$Rn))>; def : Pat<(i8 (vecreduce_add (v16i8 V128:$Rn))), (i8 (ADDVv16i8v V128:$Rn))>; def : Pat<(i16 (vecreduce_add (v4i16 V64:$Rn))), (i16 (ADDVv4i16v V64:$Rn))>; def : Pat<(i16 (vecreduce_add (v8i16 V128:$Rn))), (i16 (ADDVv8i16v V128:$Rn))>; def : Pat<(i32 (vecreduce_add (v2i32 V64:$Rn))), (i32 (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub))>; def : Pat<(i32 (vecreduce_add (v4i32 V128:$Rn))), (i32 (ADDVv4i32v V128:$Rn))>; def : Pat<(i64 (vecreduce_add (v2i64 V128:$Rn))), (i64 (ADDPv2i64p V128:$Rn))>; defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", AArch64saddv>; // vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm def : Pat<(v2i32 (AArch64saddv (v2i32 V64:$Rn))), (ADDPv2i32 V64:$Rn, V64:$Rn)>; defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", AArch64uaddv>; // vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm def : Pat<(v2i32 (AArch64uaddv (v2i32 V64:$Rn))), (ADDPv2i32 V64:$Rn, V64:$Rn)>; defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", AArch64smaxv>; def : Pat<(v2i32 (AArch64smaxv (v2i32 V64:$Rn))), (SMAXPv2i32 V64:$Rn, V64:$Rn)>; defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", AArch64sminv>; def : Pat<(v2i32 (AArch64sminv (v2i32 V64:$Rn))), (SMINPv2i32 V64:$Rn, V64:$Rn)>; defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", AArch64umaxv>; def : Pat<(v2i32 (AArch64umaxv (v2i32 V64:$Rn))), (UMAXPv2i32 V64:$Rn, V64:$Rn)>; defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", AArch64uminv>; def : Pat<(v2i32 (AArch64uminv (v2i32 V64:$Rn))), (UMINPv2i32 V64:$Rn, V64:$Rn)>; // For vecreduce_{opc} used by GlobalISel, not SDAG at the moment // because GlobalISel allows us to specify the return register to be a FPR multiclass SIMDAcrossLanesVecReductionIntrinsic { def : Pat<(i8 (opNode (v8i8 FPR64:$Rn))), (!cast(!strconcat(baseOpc, "v8i8v")) FPR64:$Rn)>; def : Pat<(i8 (opNode (v16i8 FPR128:$Rn))), (!cast(!strconcat(baseOpc, "v16i8v")) FPR128:$Rn)>; def : Pat<(i16 (opNode (v4i16 FPR64:$Rn))), (!cast(!strconcat(baseOpc, "v4i16v")) FPR64:$Rn)>; def : Pat<(i16 (opNode (v8i16 FPR128:$Rn))), (!cast(!strconcat(baseOpc, "v8i16v")) FPR128:$Rn)>; def : Pat<(i32 (opNode (v4i32 V128:$Rn))), (!cast(!strconcat(baseOpc, "v4i32v")) V128:$Rn)>; } // For v2i32 source type, the pairwise instruction can be used instead defm : SIMDAcrossLanesVecReductionIntrinsic<"UMINV", vecreduce_umin>; def : Pat<(i32 (vecreduce_umin (v2i32 V64:$Rn))), (i32 (EXTRACT_SUBREG (UMINPv2i32 V64:$Rn, V64:$Rn), ssub))>; defm : SIMDAcrossLanesVecReductionIntrinsic<"UMAXV", vecreduce_umax>; def : Pat<(i32 (vecreduce_umax (v2i32 V64:$Rn))), (i32 (EXTRACT_SUBREG (UMAXPv2i32 V64:$Rn, V64:$Rn), ssub))>; defm : SIMDAcrossLanesVecReductionIntrinsic<"SMINV", vecreduce_smin>; def : Pat<(i32 (vecreduce_smin (v2i32 V64:$Rn))), (i32 (EXTRACT_SUBREG (SMINPv2i32 V64:$Rn, V64:$Rn), ssub))>; defm : SIMDAcrossLanesVecReductionIntrinsic<"SMAXV", vecreduce_smax>; def : Pat<(i32 (vecreduce_smax (v2i32 V64:$Rn))), (i32 (EXTRACT_SUBREG (SMAXPv2i32 V64:$Rn, V64:$Rn), ssub))>; multiclass SIMDAcrossLanesSignedLongIntrinsic { def : Pat<(i32 (intOp (v8i8 V64:$Rn))), (i32 (SMOVvi16to32 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (!cast(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub), (i64 0)))>; def : Pat<(i32 (intOp (v16i8 V128:$Rn))), (i32 (SMOVvi16to32 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (!cast(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub), (i64 0)))>; def : Pat<(i32 (intOp (v4i16 V64:$Rn))), (i32 (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (!cast(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub), ssub))>; def : Pat<(i32 (intOp (v8i16 V128:$Rn))), (i32 (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (!cast(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub), ssub))>; def : Pat<(i64 (intOp (v4i32 V128:$Rn))), (i64 (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (!cast(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub), dsub))>; } multiclass SIMDAcrossLanesUnsignedLongIntrinsic { def : Pat<(i32 (intOp (v8i8 V64:$Rn))), (i32 (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (!cast(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub), ssub))>; def : Pat<(i32 (intOp (v16i8 V128:$Rn))), (i32 (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (!cast(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub), ssub))>; def : Pat<(i32 (intOp (v4i16 V64:$Rn))), (i32 (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (!cast(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub), ssub))>; def : Pat<(i32 (intOp (v8i16 V128:$Rn))), (i32 (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (!cast(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub), ssub))>; def : Pat<(i64 (intOp (v4i32 V128:$Rn))), (i64 (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (!cast(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub), dsub))>; } defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_aarch64_neon_saddlv>; defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_aarch64_neon_uaddlv>; // The vaddlv_s32 intrinsic gets mapped to SADDLP. def : Pat<(i64 (int_aarch64_neon_saddlv (v2i32 V64:$Rn))), (i64 (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (SADDLPv2i32_v1i64 V64:$Rn), dsub), dsub))>; // The vaddlv_u32 intrinsic gets mapped to UADDLP. def : Pat<(i64 (int_aarch64_neon_uaddlv (v2i32 V64:$Rn))), (i64 (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (UADDLPv2i32_v1i64 V64:$Rn), dsub), dsub))>; //------------------------------------------------------------------------------ // AdvSIMD modified immediate instructions //------------------------------------------------------------------------------ // AdvSIMD BIC defm BIC : SIMDModifiedImmVectorShiftTied<1, 0b11, 0b01, "bic", AArch64bici>; // AdvSIMD ORR defm ORR : SIMDModifiedImmVectorShiftTied<0, 0b11, 0b01, "orr", AArch64orri>; let Predicates = [HasNEON] in { def : InstAlias<"bic $Vd.4h, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0)>; def : InstAlias<"bic $Vd.8h, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>; def : InstAlias<"bic $Vd.2s, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0)>; def : InstAlias<"bic $Vd.4s, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>; def : InstAlias<"bic.4h $Vd, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0)>; def : InstAlias<"bic.8h $Vd, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>; def : InstAlias<"bic.2s $Vd, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0)>; def : InstAlias<"bic.4s $Vd, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>; def : InstAlias<"orr $Vd.4h, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0)>; def : InstAlias<"orr $Vd.8h, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>; def : InstAlias<"orr $Vd.2s, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0)>; def : InstAlias<"orr $Vd.4s, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>; def : InstAlias<"orr.4h $Vd, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0)>; def : InstAlias<"orr.8h $Vd, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>; def : InstAlias<"orr.2s $Vd, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0)>; def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>; } // AdvSIMD FMOV def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1111, V128, fpimm8, "fmov", ".2d", [(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1111, V64, fpimm8, "fmov", ".2s", [(set (v2f32 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>; def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1111, V128, fpimm8, "fmov", ".4s", [(set (v4f32 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; let Predicates = [HasNEON, HasFullFP16] in { def FMOVv4f16_ns : SIMDModifiedImmVectorNoShift<0, 0, 1, 0b1111, V64, fpimm8, "fmov", ".4h", [(set (v4f16 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>; def FMOVv8f16_ns : SIMDModifiedImmVectorNoShift<1, 0, 1, 0b1111, V128, fpimm8, "fmov", ".8h", [(set (v8f16 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; } // Predicates = [HasNEON, HasFullFP16] // AdvSIMD MOVI // EDIT byte mask: scalar let isReMaterializable = 1, isAsCheapAsAMove = 1 in def MOVID : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi", [(set FPR64:$Rd, simdimmtype10:$imm8)]>; // The movi_edit node has the immediate value already encoded, so we use // a plain imm0_255 here. def : Pat<(f64 (AArch64movi_edit imm0_255:$shift)), (MOVID imm0_255:$shift)>; // EDIT byte mask: 2d // The movi_edit node has the immediate value already encoded, so we use // a plain imm0_255 in the pattern let isReMaterializable = 1, isAsCheapAsAMove = 1 in def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128, simdimmtype10, "movi", ".2d", [(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>; def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>; def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>; def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>; def : Pat<(v16i8 immAllZerosV), (MOVIv2d_ns (i32 0))>; def : Pat<(v2f64 immAllZerosV), (MOVIv2d_ns (i32 0))>; def : Pat<(v4f32 immAllZerosV), (MOVIv2d_ns (i32 0))>; def : Pat<(v8f16 immAllZerosV), (MOVIv2d_ns (i32 0))>; def : Pat<(v8bf16 immAllZerosV), (MOVIv2d_ns (i32 0))>; def : Pat<(v2i64 immAllOnesV), (MOVIv2d_ns (i32 255))>; def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>; def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>; def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>; // Set 64-bit vectors to all 0/1 by extracting from a 128-bit register as the // extract is free and this gives better MachineCSE results. def : Pat<(v1i64 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>; def : Pat<(v2i32 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>; def : Pat<(v4i16 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>; def : Pat<(v8i8 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>; def : Pat<(v1f64 immAllZerosV), (MOVID (i32 0))>; def : Pat<(v2f32 immAllZerosV), (MOVID (i32 0))>; def : Pat<(v4f16 immAllZerosV), (MOVID (i32 0))>; def : Pat<(v4bf16 immAllZerosV), (MOVID (i32 0))>; def : Pat<(v1i64 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>; def : Pat<(v2i32 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>; def : Pat<(v4i16 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>; def : Pat<(v8i8 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>; // EDIT per word & halfword: 2s, 4h, 4s, & 8h let isReMaterializable = 1, isAsCheapAsAMove = 1 in defm MOVI : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">; let Predicates = [HasNEON] in { // Using the MOVI to materialize fp constants. def : Pat<(f32 fpimm32SIMDModImmType4:$in), (EXTRACT_SUBREG (MOVIv2i32 (fpimm32SIMDModImmType4XForm f32:$in), (i32 24)), ssub)>; } let Predicates = [HasNEON] in { def : InstAlias<"movi $Vd.4h, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; def : InstAlias<"movi $Vd.8h, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; def : InstAlias<"movi $Vd.2s, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; def : InstAlias<"movi $Vd.4s, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; def : InstAlias<"movi.4h $Vd, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; def : InstAlias<"movi.8h $Vd, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; def : InstAlias<"movi.2s $Vd, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; def : InstAlias<"movi.4s $Vd, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; } def : Pat<(v2i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), (MOVIv2i32 imm0_255:$imm8, imm:$shift)>; def : Pat<(v4i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), (MOVIv4i32 imm0_255:$imm8, imm:$shift)>; def : Pat<(v4i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), (MOVIv4i16 imm0_255:$imm8, imm:$shift)>; def : Pat<(v8i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), (MOVIv8i16 imm0_255:$imm8, imm:$shift)>; let isReMaterializable = 1, isAsCheapAsAMove = 1 in { // EDIT per word: 2s & 4s with MSL shifter def MOVIv2s_msl : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s", [(set (v2i32 V64:$Rd), (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; def MOVIv4s_msl : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s", [(set (v4i32 V128:$Rd), (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; // Per byte: 8b & 16b def MOVIv8b_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1110, V64, imm0_255, "movi", ".8b", [(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>; def MOVIv16b_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1110, V128, imm0_255, "movi", ".16b", [(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>; } // AdvSIMD MVNI // EDIT per word & halfword: 2s, 4h, 4s, & 8h let isReMaterializable = 1, isAsCheapAsAMove = 1 in defm MVNI : SIMDModifiedImmVectorShift<1, 0b10, 0b00, "mvni">; let Predicates = [HasNEON] in { def : InstAlias<"mvni $Vd.4h, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; def : InstAlias<"mvni $Vd.8h, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; def : InstAlias<"mvni $Vd.2s, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; def : InstAlias<"mvni $Vd.4s, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; def : InstAlias<"mvni.4h $Vd, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; def : InstAlias<"mvni.8h $Vd, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; def : InstAlias<"mvni.2s $Vd, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; def : InstAlias<"mvni.4s $Vd, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; } def : Pat<(v2i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), (MVNIv2i32 imm0_255:$imm8, imm:$shift)>; def : Pat<(v4i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), (MVNIv4i32 imm0_255:$imm8, imm:$shift)>; def : Pat<(v4i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), (MVNIv4i16 imm0_255:$imm8, imm:$shift)>; def : Pat<(v8i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), (MVNIv8i16 imm0_255:$imm8, imm:$shift)>; // EDIT per word: 2s & 4s with MSL shifter let isReMaterializable = 1, isAsCheapAsAMove = 1 in { def MVNIv2s_msl : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s", [(set (v2i32 V64:$Rd), (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; def MVNIv4s_msl : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s", [(set (v4i32 V128:$Rd), (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; } //---------------------------------------------------------------------------- // AdvSIMD indexed element //---------------------------------------------------------------------------- let hasSideEffects = 0 in { defm FMLA : SIMDFPIndexedTied<0, 0b0001, "fmla">; defm FMLS : SIMDFPIndexedTied<0, 0b0101, "fmls">; } // NOTE: Operands are reordered in the FMLA/FMLS PatFrags because the // instruction expects the addend first, while the intrinsic expects it last. // On the other hand, there are quite a few valid combinatorial options due to // the commutativity of multiplication and the fact that (-x) * y = x * (-y). defm : SIMDFPIndexedTiedPatterns<"FMLA", TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)>>; defm : SIMDFPIndexedTiedPatterns<"FMLA", TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)>>; defm : SIMDFPIndexedTiedPatterns<"FMLS", TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; defm : SIMDFPIndexedTiedPatterns<"FMLS", TriOpFrag<(any_fma node:$RHS, (fneg node:$MHS), node:$LHS)> >; defm : SIMDFPIndexedTiedPatterns<"FMLS", TriOpFrag<(any_fma (fneg node:$RHS), node:$MHS, node:$LHS)> >; defm : SIMDFPIndexedTiedPatterns<"FMLS", TriOpFrag<(any_fma (fneg node:$MHS), node:$RHS, node:$LHS)> >; multiclass FMLSIndexedAfterNegPatterns { // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit // and DUP scalar. def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), (AArch64duplane32 (v4f32 (fneg V128:$Rm)), VectorIndexS:$idx))), (FMLSv2i32_indexed V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>; def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), (v2f32 (AArch64duplane32 (v4f32 (insert_subvector undef, (v2f32 (fneg V64:$Rm)), (i64 0))), VectorIndexS:$idx)))), (FMLSv2i32_indexed V64:$Rd, V64:$Rn, (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>; def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), (AArch64dup (f32 (fneg FPR32Op:$Rm))))), (FMLSv2i32_indexed V64:$Rd, V64:$Rn, (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; // 3 variants for the .4s version: DUPLANE from 128-bit, DUPLANE from 64-bit // and DUP scalar. def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), (AArch64duplane32 (v4f32 (fneg V128:$Rm)), VectorIndexS:$idx))), (FMLSv4i32_indexed V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>; def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), (v4f32 (AArch64duplane32 (v4f32 (insert_subvector undef, (v2f32 (fneg V64:$Rm)), (i64 0))), VectorIndexS:$idx)))), (FMLSv4i32_indexed V128:$Rd, V128:$Rn, (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>; def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), (AArch64dup (f32 (fneg FPR32Op:$Rm))))), (FMLSv4i32_indexed V128:$Rd, V128:$Rn, (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; // 2 variants for the .2d version: DUPLANE from 128-bit, and DUP scalar // (DUPLANE from 64-bit would be trivial). def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), (AArch64duplane64 (v2f64 (fneg V128:$Rm)), VectorIndexD:$idx))), (FMLSv2i64_indexed V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>; def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), (AArch64dup (f64 (fneg FPR64Op:$Rm))))), (FMLSv2i64_indexed V128:$Rd, V128:$Rn, (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>; // 2 variants for 32-bit scalar version: extract from .2s or from .4s def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), (vector_extract (v4f32 (fneg V128:$Rm)), VectorIndexS:$idx))), (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn, V128:$Rm, VectorIndexS:$idx)>; def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), (vector_extract (v4f32 (insert_subvector undef, (v2f32 (fneg V64:$Rm)), (i64 0))), VectorIndexS:$idx))), (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn, (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>; // 1 variant for 64-bit scalar version: extract from .1d or from .2d def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn), (vector_extract (v2f64 (fneg V128:$Rm)), VectorIndexS:$idx))), (FMLSv1i64_indexed FPR64:$Rd, FPR64:$Rn, V128:$Rm, VectorIndexS:$idx)>; } defm : FMLSIndexedAfterNegPatterns< TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)> >; defm : FMLSIndexedAfterNegPatterns< TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)> >; defm FMULX : SIMDFPIndexed<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>; defm FMUL : SIMDFPIndexed<0, 0b1001, "fmul", any_fmul>; def : Pat<(v2f32 (any_fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))), (FMULv2i32_indexed V64:$Rn, (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub), (i64 0))>; def : Pat<(v4f32 (any_fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))), (FMULv4i32_indexed V128:$Rn, (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub), (i64 0))>; def : Pat<(v2f64 (any_fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))), (FMULv2i64_indexed V128:$Rn, (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rm, dsub), (i64 0))>; defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_aarch64_neon_sqdmulh>; defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_aarch64_neon_sqrdmulh>; defm SQDMULH : SIMDIndexedHSPatterns; defm SQRDMULH : SIMDIndexedHSPatterns; // Generated by MachineCombine defm MLA : SIMDVectorIndexedHSTied<1, 0b0000, "mla", null_frag>; defm MLS : SIMDVectorIndexedHSTied<1, 0b0100, "mls", null_frag>; defm MUL : SIMDVectorIndexedHS<0, 0b1000, "mul", mul>; defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal", TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>; defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl", TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>; defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull", AArch64smull>; defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal", int_aarch64_neon_sqadd>; defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl", int_aarch64_neon_sqsub>; defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah", int_aarch64_neon_sqrdmlah>; defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh", int_aarch64_neon_sqrdmlsh>; defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>; defm UMLAL : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal", TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>; defm UMLSL : SIMDVectorIndexedLongSDTied<1, 0b0110, "umlsl", TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>; defm UMULL : SIMDVectorIndexedLongSD<1, 0b1010, "umull", AArch64umull>; // A scalar sqdmull with the second operand being a vector lane can be // handled directly with the indexed instruction encoding. def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), (vector_extract (v4i32 V128:$Vm), VectorIndexS:$idx)), (SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>; //---------------------------------------------------------------------------- // AdvSIMD scalar shift instructions //---------------------------------------------------------------------------- defm FCVTZS : SIMDFPScalarRShift<0, 0b11111, "fcvtzs">; defm FCVTZU : SIMDFPScalarRShift<1, 0b11111, "fcvtzu">; defm SCVTF : SIMDFPScalarRShift<0, 0b11100, "scvtf">; defm UCVTF : SIMDFPScalarRShift<1, 0b11100, "ucvtf">; // Codegen patterns for the above. We don't put these directly on the // instructions because TableGen's type inference can't handle the truth. // Having the same base pattern for fp <--> int totally freaks it out. def : Pat<(int_aarch64_neon_vcvtfp2fxs FPR32:$Rn, vecshiftR32:$imm), (FCVTZSs FPR32:$Rn, vecshiftR32:$imm)>; def : Pat<(int_aarch64_neon_vcvtfp2fxu FPR32:$Rn, vecshiftR32:$imm), (FCVTZUs FPR32:$Rn, vecshiftR32:$imm)>; def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f64 FPR64:$Rn), vecshiftR64:$imm)), (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>; def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f64 FPR64:$Rn), vecshiftR64:$imm)), (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>; def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn), vecshiftR64:$imm)), (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>; def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn), vecshiftR64:$imm)), (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>; def : Pat<(int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm), (UCVTFs FPR32:$Rn, vecshiftR32:$imm)>; def : Pat<(f64 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>; def : Pat<(v1f64 (int_aarch64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn), vecshiftR64:$imm)), (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; def : Pat<(f64 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn), vecshiftR64:$imm)), (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>; def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm), (SCVTFs FPR32:$Rn, vecshiftR32:$imm)>; // Patterns for FP16 Intrinsics - requires reg copy to/from as i16s not supported. def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 (sext_inreg FPR32:$Rn, i16)), vecshiftR16:$imm)), (SCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>; def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 FPR32:$Rn), vecshiftR16:$imm)), (SCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>; def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR16:$imm)), (SCVTFh (f16 (EXTRACT_SUBREG FPR64:$Rn, hsub)), vecshiftR16:$imm)>; def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp (and FPR32:$Rn, (i32 65535)), vecshiftR16:$imm)), (UCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>; def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR16:$imm)), (UCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>; def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR16:$imm)), (UCVTFh (f16 (EXTRACT_SUBREG FPR64:$Rn, hsub)), vecshiftR16:$imm)>; def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR32:$imm)), (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), (FCVTZSh FPR16:$Rn, vecshiftR32:$imm), hsub))>; def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR64:$imm)), (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (FCVTZSh FPR16:$Rn, vecshiftR64:$imm), hsub))>; def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR32:$imm)), (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), (FCVTZUh FPR16:$Rn, vecshiftR32:$imm), hsub))>; def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR64:$imm)), (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (FCVTZUh FPR16:$Rn, vecshiftR64:$imm), hsub))>; def : Pat<(i32 (int_aarch64_neon_facge (f16 FPR16:$Rn), (f16 FPR16:$Rm))), (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), (FACGE16 FPR16:$Rn, FPR16:$Rm), hsub))>; def : Pat<(i32 (int_aarch64_neon_facgt (f16 FPR16:$Rn), (f16 FPR16:$Rm))), (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), (FACGT16 FPR16:$Rn, FPR16:$Rm), hsub))>; defm SHL : SIMDScalarLShiftD< 0, 0b01010, "shl", AArch64vshl>; defm SLI : SIMDScalarLShiftDTied<1, 0b01010, "sli">; defm SQRSHRN : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn", int_aarch64_neon_sqrshrn>; defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun", int_aarch64_neon_sqrshrun>; defm SQSHLU : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>; defm SQSHL : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>; defm SQSHRN : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn", int_aarch64_neon_sqshrn>; defm SQSHRUN : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun", int_aarch64_neon_sqshrun>; defm SRI : SIMDScalarRShiftDTied< 1, 0b01000, "sri">; defm SRSHR : SIMDScalarRShiftD< 0, 0b00100, "srshr", AArch64srshri>; defm SRSRA : SIMDScalarRShiftDTied< 0, 0b00110, "srsra", TriOpFrag<(add node:$LHS, (AArch64srshri node:$MHS, node:$RHS))>>; defm SSHR : SIMDScalarRShiftD< 0, 0b00000, "sshr", AArch64vashr>; defm SSRA : SIMDScalarRShiftDTied< 0, 0b00010, "ssra", TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>; defm UQRSHRN : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn", int_aarch64_neon_uqrshrn>; defm UQSHL : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>; defm UQSHRN : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn", int_aarch64_neon_uqshrn>; defm URSHR : SIMDScalarRShiftD< 1, 0b00100, "urshr", AArch64urshri>; defm URSRA : SIMDScalarRShiftDTied< 1, 0b00110, "ursra", TriOpFrag<(add node:$LHS, (AArch64urshri node:$MHS, node:$RHS))>>; defm USHR : SIMDScalarRShiftD< 1, 0b00000, "ushr", AArch64vlshr>; defm USRA : SIMDScalarRShiftDTied< 1, 0b00010, "usra", TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))>>; //---------------------------------------------------------------------------- // AdvSIMD vector shift instructions //---------------------------------------------------------------------------- defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>; defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>; defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf", int_aarch64_neon_vcvtfxs2fp>; defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn", AArch64rshrn>; defm SHL : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>; let Predicates = [HasNEON] in { def : Pat<(v2f32 (sint_to_fp (v2i32 (AArch64vashr_exact v2i32:$Vn, i32:$shift)))), (SCVTFv2i32_shift $Vn, vecshiftR32:$shift)>; def : Pat<(v4f32 (sint_to_fp (v4i32 (AArch64vashr_exact v4i32:$Vn, i32:$shift)))), (SCVTFv4i32_shift $Vn, vecshiftR32:$shift)>; def : Pat<(v2f64 (sint_to_fp (v2i64 (AArch64vashr_exact v2i64:$Vn, i32:$shift)))), (SCVTFv2i64_shift $Vn, vecshiftR64:$shift)>; } let Predicates = [HasNEON, HasFullFP16] in { def : Pat<(v4f16 (sint_to_fp (v4i16 (AArch64vashr_exact v4i16:$Vn, i32:$shift)))), (SCVTFv4i16_shift $Vn, vecshiftR16:$shift)>; def : Pat<(v8f16 (sint_to_fp (v8i16 (AArch64vashr_exact v8i16:$Vn, i32:$shift)))), (SCVTFv8i16_shift $Vn, vecshiftR16:$shift)>; } // X << 1 ==> X + X class SHLToADDPat : Pat<(ty (AArch64vshl (ty regtype:$Rn), (i32 1))), (!cast("ADD"#ty) regtype:$Rn, regtype:$Rn)>; def : SHLToADDPat; def : SHLToADDPat; def : SHLToADDPat; def : SHLToADDPat; def : SHLToADDPat; def : SHLToADDPat; def : SHLToADDPat; defm SHRN : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn", BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>; defm SLI : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", AArch64vsli>; def : Pat<(v1i64 (AArch64vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), (i32 vecshiftL64:$imm))), (SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>; defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn", int_aarch64_neon_sqrshrn>; defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun", int_aarch64_neon_sqrshrun>; defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>; defm SQSHL : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>; defm SQSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn", int_aarch64_neon_sqshrn>; defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun", int_aarch64_neon_sqshrun>; defm SRI : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", AArch64vsri>; def : Pat<(v1i64 (AArch64vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), (i32 vecshiftR64:$imm))), (SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>; defm SRSHR : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", AArch64srshri>; defm SRSRA : SIMDVectorRShiftBHSDTied<0, 0b00110, "srsra", TriOpFrag<(add node:$LHS, (AArch64srshri node:$MHS, node:$RHS))> >; defm SSHLL : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll", BinOpFrag<(AArch64vshl (sext node:$LHS), node:$RHS)>>; defm SSHR : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>; defm SSRA : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra", TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>; defm UCVTF : SIMDVectorRShiftToFP<1, 0b11100, "ucvtf", int_aarch64_neon_vcvtfxu2fp>; defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn", int_aarch64_neon_uqrshrn>; defm UQSHL : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>; defm UQSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn", int_aarch64_neon_uqshrn>; defm URSHR : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", AArch64urshri>; defm URSRA : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra", TriOpFrag<(add node:$LHS, (AArch64urshri node:$MHS, node:$RHS))> >; defm USHLL : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll", BinOpFrag<(AArch64vshl (zext node:$LHS), node:$RHS)>>; defm USHR : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>; defm USRA : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra", TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >; def VImm0080: PatLeaf<(AArch64movi_shift (i32 128), (i32 0))>; def VImm00008000: PatLeaf<(AArch64movi_shift (i32 128), (i32 8))>; def VImm0000000080000000: PatLeaf<(AArch64NvCast (v2f64 (fneg (AArch64NvCast (v4i32 (AArch64movi_shift (i32 128), (i32 24)))))))>; // RADDHN patterns for when RSHRN shifts by half the size of the vector element def : Pat<(v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), VImm0080), (i32 8)))), (RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>; def : Pat<(v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), VImm00008000), (i32 16)))), (RADDHNv4i32_v4i16 V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>; let AddedComplexity = 5 in def : Pat<(v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), VImm0000000080000000), (i32 32)))), (RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>; def : Pat<(v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))), (RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>; def : Pat<(v4i16 (int_aarch64_neon_rshrn (v4i32 V128:$Vn), (i32 16))), (RADDHNv4i32_v4i16 V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>; def : Pat<(v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))), (RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>; // RADDHN2 patterns for when RSHRN shifts by half the size of the vector element def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Vd), (v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), VImm0080), (i32 8)))))), (RADDHNv8i16_v16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>; def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Vd), (v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), VImm00008000), (i32 16)))))), (RADDHNv4i32_v8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>; let AddedComplexity = 5 in def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Vd), (v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), VImm0000000080000000), (i32 32)))))), (RADDHNv2i64_v4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>; def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Vd), (v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))))), (RADDHNv8i16_v16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>; def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Vd), (v4i16 (int_aarch64_neon_rshrn (v4i32 V128:$Vn), (i32 16))))), (RADDHNv4i32_v8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>; def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Vd), (v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))))), (RADDHNv2i64_v4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>; // SHRN patterns for when a logical right shift was used instead of arithmetic // (the immediate guarantees no sign bits actually end up in the result so it // doesn't matter). def : Pat<(v8i8 (trunc (AArch64vlshr (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))), (SHRNv8i8_shift V128:$Rn, vecshiftR16Narrow:$imm)>; def : Pat<(v4i16 (trunc (AArch64vlshr (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))), (SHRNv4i16_shift V128:$Rn, vecshiftR32Narrow:$imm)>; def : Pat<(v2i32 (trunc (AArch64vlshr (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))), (SHRNv2i32_shift V128:$Rn, vecshiftR64Narrow:$imm)>; def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Rd), (trunc (AArch64vlshr (v8i16 V128:$Rn), vecshiftR16Narrow:$imm)))), (SHRNv16i8_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn, vecshiftR16Narrow:$imm)>; def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Rd), (trunc (AArch64vlshr (v4i32 V128:$Rn), vecshiftR32Narrow:$imm)))), (SHRNv8i16_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn, vecshiftR32Narrow:$imm)>; def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Rd), (trunc (AArch64vlshr (v2i64 V128:$Rn), vecshiftR64Narrow:$imm)))), (SHRNv4i32_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn, vecshiftR32Narrow:$imm)>; // Vector sign and zero extensions are implemented with SSHLL and USSHLL. // Anyexts are implemented as zexts. def : Pat<(v8i16 (sext (v8i8 V64:$Rn))), (SSHLLv8i8_shift V64:$Rn, (i32 0))>; def : Pat<(v8i16 (zext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>; def : Pat<(v8i16 (anyext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>; def : Pat<(v4i32 (sext (v4i16 V64:$Rn))), (SSHLLv4i16_shift V64:$Rn, (i32 0))>; def : Pat<(v4i32 (zext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>; def : Pat<(v4i32 (anyext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>; def : Pat<(v2i64 (sext (v2i32 V64:$Rn))), (SSHLLv2i32_shift V64:$Rn, (i32 0))>; def : Pat<(v2i64 (zext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>; def : Pat<(v2i64 (anyext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>; // Vector bf16 -> fp32 is implemented morally as a zext + shift. def : Pat<(v4f32 (any_fpextend (v4bf16 V64:$Rn))), (SHLLv4i16 V64:$Rn)>; // Also match an extend from the upper half of a 128 bit source register. def : Pat<(v8i16 (anyext (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn)) ))), (USHLLv16i8_shift V128:$Rn, (i32 0))>; def : Pat<(v8i16 (zext (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn)) ))), (USHLLv16i8_shift V128:$Rn, (i32 0))>; def : Pat<(v8i16 (sext (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn)) ))), (SSHLLv16i8_shift V128:$Rn, (i32 0))>; def : Pat<(v4i32 (anyext (v4i16 (extract_high_v8i16 (v8i16 V128:$Rn)) ))), (USHLLv8i16_shift V128:$Rn, (i32 0))>; def : Pat<(v4i32 (zext (v4i16 (extract_high_v8i16 (v8i16 V128:$Rn)) ))), (USHLLv8i16_shift V128:$Rn, (i32 0))>; def : Pat<(v4i32 (sext (v4i16 (extract_high_v8i16 (v8i16 V128:$Rn)) ))), (SSHLLv8i16_shift V128:$Rn, (i32 0))>; def : Pat<(v2i64 (anyext (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))), (USHLLv4i32_shift V128:$Rn, (i32 0))>; def : Pat<(v2i64 (zext (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))), (USHLLv4i32_shift V128:$Rn, (i32 0))>; def : Pat<(v2i64 (sext (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))), (SSHLLv4i32_shift V128:$Rn, (i32 0))>; let Predicates = [HasNEON] in { // Vector shift sxtl aliases def : InstAlias<"sxtl.8h $dst, $src1", (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>; def : InstAlias<"sxtl $dst.8h, $src1.8b", (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>; def : InstAlias<"sxtl.4s $dst, $src1", (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>; def : InstAlias<"sxtl $dst.4s, $src1.4h", (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>; def : InstAlias<"sxtl.2d $dst, $src1", (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>; def : InstAlias<"sxtl $dst.2d, $src1.2s", (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>; // Vector shift sxtl2 aliases def : InstAlias<"sxtl2.8h $dst, $src1", (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>; def : InstAlias<"sxtl2 $dst.8h, $src1.16b", (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>; def : InstAlias<"sxtl2.4s $dst, $src1", (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>; def : InstAlias<"sxtl2 $dst.4s, $src1.8h", (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>; def : InstAlias<"sxtl2.2d $dst, $src1", (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>; def : InstAlias<"sxtl2 $dst.2d, $src1.4s", (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>; // Vector shift uxtl aliases def : InstAlias<"uxtl.8h $dst, $src1", (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>; def : InstAlias<"uxtl $dst.8h, $src1.8b", (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>; def : InstAlias<"uxtl.4s $dst, $src1", (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>; def : InstAlias<"uxtl $dst.4s, $src1.4h", (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>; def : InstAlias<"uxtl.2d $dst, $src1", (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>; def : InstAlias<"uxtl $dst.2d, $src1.2s", (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>; // Vector shift uxtl2 aliases def : InstAlias<"uxtl2.8h $dst, $src1", (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>; def : InstAlias<"uxtl2 $dst.8h, $src1.16b", (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>; def : InstAlias<"uxtl2.4s $dst, $src1", (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>; def : InstAlias<"uxtl2 $dst.4s, $src1.8h", (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>; def : InstAlias<"uxtl2.2d $dst, $src1", (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>; def : InstAlias<"uxtl2 $dst.2d, $src1.4s", (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>; } def abs_f16 : OutPatFrag<(ops node:$Rn), (EXTRACT_SUBREG (f32 (COPY_TO_REGCLASS (i32 (ANDWri (i32 (COPY_TO_REGCLASS (INSERT_SUBREG (f32 (IMPLICIT_DEF)), node:$Rn, hsub), GPR32)), (i32 (logical_imm32_XFORM(i32 0x7fff))))), FPR32)), hsub)>; def : Pat<(f16 (fabs (f16 FPR16:$Rn))), (f16 (abs_f16 (f16 FPR16:$Rn)))>; def : Pat<(bf16 (fabs (bf16 FPR16:$Rn))), (bf16 (abs_f16 (bf16 FPR16:$Rn)))>; def neg_f16 : OutPatFrag<(ops node:$Rn), (EXTRACT_SUBREG (f32 (COPY_TO_REGCLASS (i32 (EORWri (i32 (COPY_TO_REGCLASS (INSERT_SUBREG (f32 (IMPLICIT_DEF)), node:$Rn, hsub), GPR32)), (i32 (logical_imm32_XFORM(i32 0x8000))))), FPR32)), hsub)>; def : Pat<(f16 (fneg (f16 FPR16:$Rn))), (f16 (neg_f16 (f16 FPR16:$Rn)))>; def : Pat<(bf16 (fneg (bf16 FPR16:$Rn))), (bf16 (neg_f16 (bf16 FPR16:$Rn)))>; let Predicates = [HasNEON] in { def : Pat<(v4f16 (fabs (v4f16 V64:$Rn))), (v4f16 (BICv4i16 (v4f16 V64:$Rn), (i32 128), (i32 8)))>; def : Pat<(v4bf16 (fabs (v4bf16 V64:$Rn))), (v4bf16 (BICv4i16 (v4bf16 V64:$Rn), (i32 128), (i32 8)))>; def : Pat<(v8f16 (fabs (v8f16 V128:$Rn))), (v8f16 (BICv8i16 (v8f16 V128:$Rn), (i32 128), (i32 8)))>; def : Pat<(v8bf16 (fabs (v8bf16 V128:$Rn))), (v8bf16 (BICv8i16 (v8bf16 V128:$Rn), (i32 128), (i32 8)))>; def : Pat<(v4f16 (fneg (v4f16 V64:$Rn))), (v4f16 (EORv8i8 (v4f16 V64:$Rn), (MOVIv4i16 (i32 128), (i32 8))))>; def : Pat<(v4bf16 (fneg (v4bf16 V64:$Rn))), (v4bf16 (EORv8i8 (v4bf16 V64:$Rn), (v4i16 (MOVIv4i16 (i32 0x80), (i32 8)))))>; def : Pat<(v8f16 (fneg (v8f16 V128:$Rn))), (v8f16 (EORv16i8 (v8f16 V128:$Rn), (MOVIv8i16 (i32 128), (i32 8))))>; def : Pat<(v8bf16 (fneg (v8bf16 V128:$Rn))), (v8bf16 (EORv16i8 (v8bf16 V128:$Rn), (v8i16 (MOVIv8i16 (i32 0x80), (i32 8)))))>; } // If an integer is about to be converted to a floating point value, // just load it on the floating point unit. // These patterns are more complex because floating point loads do not // support sign extension. // The sign extension has to be explicitly added and is only supported for // one step: byte-to-half, half-to-word, word-to-doubleword. // SCVTF GPR -> FPR is 9 cycles. // SCVTF FPR -> FPR is 4 cyclces. // (sign extension with lengthen) SXTL FPR -> FPR is 2 cycles. // Therefore, we can do 2 sign extensions and one SCVTF FPR -> FPR // and still being faster. // However, this is not good for code size. // 8-bits -> float. 2 sizes step-up. class SExtLoadi8CVTf32Pat : Pat<(f32 (sint_to_fp (i32 (sextloadi8 addrmode)))), (SCVTFv1i32 (f32 (EXTRACT_SUBREG (SSHLLv4i16_shift (f64 (EXTRACT_SUBREG (SSHLLv8i8_shift (INSERT_SUBREG (f64 (IMPLICIT_DEF)), INST, bsub), 0), dsub)), 0), ssub)))>, Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>; def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext), (LDRBroW GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>; def : SExtLoadi8CVTf32Pat<(ro8.Xpat GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext), (LDRBroX GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext)>; def : SExtLoadi8CVTf32Pat<(am_indexed8 GPR64sp:$Rn, uimm12s1:$offset), (LDRBui GPR64sp:$Rn, uimm12s1:$offset)>; def : SExtLoadi8CVTf32Pat<(am_unscaled8 GPR64sp:$Rn, simm9:$offset), (LDURBi GPR64sp:$Rn, simm9:$offset)>; // 16-bits -> float. 1 size step-up. class SExtLoadi16CVTf32Pat : Pat<(f32 (sint_to_fp (i32 (sextloadi16 addrmode)))), (SCVTFv1i32 (f32 (EXTRACT_SUBREG (SSHLLv4i16_shift (INSERT_SUBREG (f64 (IMPLICIT_DEF)), INST, hsub), 0), ssub)))>, Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>; def : SExtLoadi16CVTf32Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext), (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>; def : SExtLoadi16CVTf32Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext), (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>; def : SExtLoadi16CVTf32Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset), (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; def : SExtLoadi16CVTf32Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset), (LDURHi GPR64sp:$Rn, simm9:$offset)>; // 32-bits to 32-bits are handled in target specific dag combine: // performIntToFpCombine. // 64-bits integer to 32-bits floating point, not possible with // SCVTF on floating point registers (both source and destination // must have the same size). // Here are the patterns for 8, 16, 32, and 64-bits to double. // 8-bits -> double. 3 size step-up: give up. // 16-bits -> double. 2 size step. class SExtLoadi16CVTf64Pat : Pat <(f64 (sint_to_fp (i32 (sextloadi16 addrmode)))), (SCVTFv1i64 (f64 (EXTRACT_SUBREG (SSHLLv2i32_shift (f64 (EXTRACT_SUBREG (SSHLLv4i16_shift (INSERT_SUBREG (f64 (IMPLICIT_DEF)), INST, hsub), 0), dsub)), 0), dsub)))>, Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>; def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext), (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>; def : SExtLoadi16CVTf64Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext), (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>; def : SExtLoadi16CVTf64Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset), (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; def : SExtLoadi16CVTf64Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset), (LDURHi GPR64sp:$Rn, simm9:$offset)>; // 32-bits -> double. 1 size step-up. class SExtLoadi32CVTf64Pat : Pat <(f64 (sint_to_fp (i32 (load addrmode)))), (SCVTFv1i64 (f64 (EXTRACT_SUBREG (SSHLLv2i32_shift (INSERT_SUBREG (f64 (IMPLICIT_DEF)), INST, ssub), 0), dsub)))>, Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>; def : SExtLoadi32CVTf64Pat<(ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext), (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext)>; def : SExtLoadi32CVTf64Pat<(ro32.Xpat GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext), (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext)>; def : SExtLoadi32CVTf64Pat<(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset), (LDRSui GPR64sp:$Rn, uimm12s4:$offset)>; def : SExtLoadi32CVTf64Pat<(am_unscaled32 GPR64sp:$Rn, simm9:$offset), (LDURSi GPR64sp:$Rn, simm9:$offset)>; // 64-bits -> double are handled in target specific dag combine: // performIntToFpCombine. //---------------------------------------------------------------------------- // AdvSIMD Load-Store Structure //---------------------------------------------------------------------------- defm LD1 : SIMDLd1Multiple<"ld1">; defm LD2 : SIMDLd2Multiple<"ld2">; defm LD3 : SIMDLd3Multiple<"ld3">; defm LD4 : SIMDLd4Multiple<"ld4">; defm ST1 : SIMDSt1Multiple<"st1">; defm ST2 : SIMDSt2Multiple<"st2">; defm ST3 : SIMDSt3Multiple<"st3">; defm ST4 : SIMDSt4Multiple<"st4">; class Ld1Pat : Pat<(ty (load GPR64sp:$Rn)), (INST GPR64sp:$Rn)>; def : Ld1Pat; def : Ld1Pat; def : Ld1Pat; def : Ld1Pat; def : Ld1Pat; def : Ld1Pat; def : Ld1Pat; def : Ld1Pat; class St1Pat : Pat<(store ty:$Vt, GPR64sp:$Rn), (INST ty:$Vt, GPR64sp:$Rn)>; def : St1Pat; def : St1Pat; def : St1Pat; def : St1Pat; def : St1Pat; def : St1Pat; def : St1Pat; def : St1Pat; //--- // Single-element //--- defm LD1R : SIMDLdR<0, 0b110, 0, "ld1r", "One", 1, 2, 4, 8>; defm LD2R : SIMDLdR<1, 0b110, 0, "ld2r", "Two", 2, 4, 8, 16>; defm LD3R : SIMDLdR<0, 0b111, 0, "ld3r", "Three", 3, 6, 12, 24>; defm LD4R : SIMDLdR<1, 0b111, 0, "ld4r", "Four", 4, 8, 16, 32>; let mayLoad = 1, hasSideEffects = 0 in { defm LD1 : SIMDLdSingleBTied<0, 0b000, "ld1", VecListOneb, GPR64pi1>; defm LD1 : SIMDLdSingleHTied<0, 0b010, 0, "ld1", VecListOneh, GPR64pi2>; defm LD1 : SIMDLdSingleSTied<0, 0b100, 0b00, "ld1", VecListOnes, GPR64pi4>; defm LD1 : SIMDLdSingleDTied<0, 0b100, 0b01, "ld1", VecListOned, GPR64pi8>; defm LD2 : SIMDLdSingleBTied<1, 0b000, "ld2", VecListTwob, GPR64pi2>; defm LD2 : SIMDLdSingleHTied<1, 0b010, 0, "ld2", VecListTwoh, GPR64pi4>; defm LD2 : SIMDLdSingleSTied<1, 0b100, 0b00, "ld2", VecListTwos, GPR64pi8>; defm LD2 : SIMDLdSingleDTied<1, 0b100, 0b01, "ld2", VecListTwod, GPR64pi16>; defm LD3 : SIMDLdSingleBTied<0, 0b001, "ld3", VecListThreeb, GPR64pi3>; defm LD3 : SIMDLdSingleHTied<0, 0b011, 0, "ld3", VecListThreeh, GPR64pi6>; defm LD3 : SIMDLdSingleSTied<0, 0b101, 0b00, "ld3", VecListThrees, GPR64pi12>; defm LD3 : SIMDLdSingleDTied<0, 0b101, 0b01, "ld3", VecListThreed, GPR64pi24>; defm LD4 : SIMDLdSingleBTied<1, 0b001, "ld4", VecListFourb, GPR64pi4>; defm LD4 : SIMDLdSingleHTied<1, 0b011, 0, "ld4", VecListFourh, GPR64pi8>; defm LD4 : SIMDLdSingleSTied<1, 0b101, 0b00, "ld4", VecListFours, GPR64pi16>; defm LD4 : SIMDLdSingleDTied<1, 0b101, 0b01, "ld4", VecListFourd, GPR64pi32>; } def : Pat<(v8i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))), (LD1Rv8b GPR64sp:$Rn)>; def : Pat<(v16i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))), (LD1Rv16b GPR64sp:$Rn)>; def : Pat<(v4i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))), (LD1Rv4h GPR64sp:$Rn)>; def : Pat<(v8i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))), (LD1Rv8h GPR64sp:$Rn)>; def : Pat<(v2i32 (AArch64dup (i32 (load GPR64sp:$Rn)))), (LD1Rv2s GPR64sp:$Rn)>; def : Pat<(v4i32 (AArch64dup (i32 (load GPR64sp:$Rn)))), (LD1Rv4s GPR64sp:$Rn)>; def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), (LD1Rv2d GPR64sp:$Rn)>; def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), (LD1Rv1d GPR64sp:$Rn)>; def : Pat<(v8i8 (AArch64duplane8 (v16i8 (insert_subvector undef, (v8i8 (load GPR64sp:$Rn)), (i64 0))), (i64 0))), (LD1Rv8b GPR64sp:$Rn)>; def : Pat<(v16i8 (AArch64duplane8 (v16i8 (load GPR64sp:$Rn)), (i64 0))), (LD1Rv16b GPR64sp:$Rn)>; def : Pat<(v4i16 (AArch64duplane16 (v8i16 (insert_subvector undef, (v4i16 (load GPR64sp:$Rn)), (i64 0))), (i64 0))), (LD1Rv4h GPR64sp:$Rn)>; def : Pat<(v8i16 (AArch64duplane16 (v8i16 (load GPR64sp:$Rn)), (i64 0))), (LD1Rv8h GPR64sp:$Rn)>; def : Pat<(v2i32 (AArch64duplane32 (v4i32 (insert_subvector undef, (v2i32 (load GPR64sp:$Rn)), (i64 0))), (i64 0))), (LD1Rv2s GPR64sp:$Rn)>; def : Pat<(v4i32 (AArch64duplane32 (v4i32 (load GPR64sp:$Rn)), (i64 0))), (LD1Rv4s GPR64sp:$Rn)>; def : Pat<(v2i64 (AArch64duplane64 (v2i64 (load GPR64sp:$Rn)), (i64 0))), (LD1Rv2d GPR64sp:$Rn)>; // Grab the floating point version too def : Pat<(v2f32 (AArch64dup (f32 (load GPR64sp:$Rn)))), (LD1Rv2s GPR64sp:$Rn)>; def : Pat<(v4f32 (AArch64dup (f32 (load GPR64sp:$Rn)))), (LD1Rv4s GPR64sp:$Rn)>; def : Pat<(v2f64 (AArch64dup (f64 (load GPR64sp:$Rn)))), (LD1Rv2d GPR64sp:$Rn)>; def : Pat<(v1f64 (AArch64dup (f64 (load GPR64sp:$Rn)))), (LD1Rv1d GPR64sp:$Rn)>; def : Pat<(v4f16 (AArch64dup (f16 (load GPR64sp:$Rn)))), (LD1Rv4h GPR64sp:$Rn)>; def : Pat<(v8f16 (AArch64dup (f16 (load GPR64sp:$Rn)))), (LD1Rv8h GPR64sp:$Rn)>; def : Pat<(v4bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))), (LD1Rv4h GPR64sp:$Rn)>; def : Pat<(v8bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))), (LD1Rv8h GPR64sp:$Rn)>; class Ld1Lane128Pat : Pat<(vector_insert (VTy VecListOne128:$Rd), (STy (scalar_load GPR64sp:$Rn)), (i64 VecIndex:$idx)), (LD1 VecListOne128:$Rd, VecIndex:$idx, GPR64sp:$Rn)>; def : Ld1Lane128Pat; def : Ld1Lane128Pat; def : Ld1Lane128Pat; def : Ld1Lane128Pat; def : Ld1Lane128Pat; def : Ld1Lane128Pat; def : Ld1Lane128Pat; def : Ld1Lane128Pat; // Generate LD1 for extload if memory type does not match the // destination type, for example: // // (v4i32 (insert_vector_elt (load anyext from i8) idx)) // // In this case, the index must be adjusted to match LD1 type. // class Ld1Lane128IdxOpPat : Pat<(vector_insert (VTy VecListOne128:$Rd), (STy (scalar_load GPR64sp:$Rn)), (i64 VecIndex:$idx)), (LD1 VecListOne128:$Rd, (IdxOp VecIndex:$idx), GPR64sp:$Rn)>; class Ld1Lane64IdxOpPat : Pat<(vector_insert (VTy VecListOne64:$Rd), (STy (scalar_load GPR64sp:$Rn)), (i64 VecIndex:$idx)), (EXTRACT_SUBREG (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub), (IdxOp VecIndex:$idx), GPR64sp:$Rn), dsub)>; def VectorIndexStoH : SDNodeXFormgetTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64); }]>; def VectorIndexStoB : SDNodeXFormgetTargetConstant(N->getZExtValue() * 4, SDLoc(N), MVT::i64); }]>; def VectorIndexHtoB : SDNodeXFormgetTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64); }]>; def : Ld1Lane128IdxOpPat; def : Ld1Lane128IdxOpPat; def : Ld1Lane128IdxOpPat; def : Ld1Lane64IdxOpPat; def : Ld1Lane64IdxOpPat; def : Ld1Lane64IdxOpPat; // Same as above, but the first element is populated using // scalar_to_vector + insert_subvector instead of insert_vector_elt. let Predicates = [HasNEON] in { class Ld1Lane128FirstElm : Pat<(ResultTy (scalar_to_vector (i32 (ExtLoad GPR64sp:$Rn)))), (ResultTy (EXTRACT_SUBREG (LD1 (VecTy (IMPLICIT_DEF)), 0, GPR64sp:$Rn), dsub))>; def : Ld1Lane128FirstElm; def : Ld1Lane128FirstElm; def : Ld1Lane128FirstElm; } class Ld1Lane64Pat : Pat<(vector_insert (VTy VecListOne64:$Rd), (STy (scalar_load GPR64sp:$Rn)), (i64 VecIndex:$idx)), (EXTRACT_SUBREG (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub), VecIndex:$idx, GPR64sp:$Rn), dsub)>; def : Ld1Lane64Pat; def : Ld1Lane64Pat; def : Ld1Lane64Pat; def : Ld1Lane64Pat; def : Ld1Lane64Pat; def : Ld1Lane64Pat; defm LD1 : SIMDLdSt1SingleAliases<"ld1">; defm LD2 : SIMDLdSt2SingleAliases<"ld2">; defm LD3 : SIMDLdSt3SingleAliases<"ld3">; defm LD4 : SIMDLdSt4SingleAliases<"ld4">; // Stores defm ST1 : SIMDStSingleB<0, 0b000, "st1", VecListOneb, GPR64pi1>; defm ST1 : SIMDStSingleH<0, 0b010, 0, "st1", VecListOneh, GPR64pi2>; defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>; defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>; let AddedComplexity = 19 in class St1Lane128Pat : Pat<(scalar_store (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), GPR64sp:$Rn), (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn)>; def : St1Lane128Pat; def : St1Lane128Pat; def : St1Lane128Pat; def : St1Lane128Pat; def : St1Lane128Pat; def : St1Lane128Pat; def : St1Lane128Pat; def : St1Lane128Pat; let AddedComplexity = 19 in class St1Lane64Pat : Pat<(scalar_store (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), GPR64sp:$Rn), (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), VecIndex:$idx, GPR64sp:$Rn)>; def : St1Lane64Pat; def : St1Lane64Pat; def : St1Lane64Pat; def : St1Lane64Pat; def : St1Lane64Pat; def : St1Lane64Pat; multiclass St1LanePost64Pat { def : Pat<(scalar_store (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), GPR64sp:$Rn, offset), (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), VecIndex:$idx, GPR64sp:$Rn, XZR)>; def : Pat<(scalar_store (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), GPR64sp:$Rn, GPR64:$Rm), (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), VecIndex:$idx, GPR64sp:$Rn, $Rm)>; } defm : St1LanePost64Pat; defm : St1LanePost64Pat; defm : St1LanePost64Pat; defm : St1LanePost64Pat; defm : St1LanePost64Pat; defm : St1LanePost64Pat; defm : St1LanePost64Pat; defm : St1LanePost64Pat; multiclass St1LanePost128Pat { def : Pat<(scalar_store (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), GPR64sp:$Rn, offset), (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, XZR)>; def : Pat<(scalar_store (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), GPR64sp:$Rn, GPR64:$Rm), (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, $Rm)>; } defm : St1LanePost128Pat; defm : St1LanePost128Pat; defm : St1LanePost128Pat; defm : St1LanePost128Pat; defm : St1LanePost128Pat; defm : St1LanePost128Pat; defm : St1LanePost128Pat; defm : St1LanePost128Pat; let mayStore = 1, hasSideEffects = 0 in { defm ST2 : SIMDStSingleB<1, 0b000, "st2", VecListTwob, GPR64pi2>; defm ST2 : SIMDStSingleH<1, 0b010, 0, "st2", VecListTwoh, GPR64pi4>; defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos, GPR64pi8>; defm ST2 : SIMDStSingleD<1, 0b100, 0b01, "st2", VecListTwod, GPR64pi16>; defm ST3 : SIMDStSingleB<0, 0b001, "st3", VecListThreeb, GPR64pi3>; defm ST3 : SIMDStSingleH<0, 0b011, 0, "st3", VecListThreeh, GPR64pi6>; defm ST3 : SIMDStSingleS<0, 0b101, 0b00, "st3", VecListThrees, GPR64pi12>; defm ST3 : SIMDStSingleD<0, 0b101, 0b01, "st3", VecListThreed, GPR64pi24>; defm ST4 : SIMDStSingleB<1, 0b001, "st4", VecListFourb, GPR64pi4>; defm ST4 : SIMDStSingleH<1, 0b011, 0, "st4", VecListFourh, GPR64pi8>; defm ST4 : SIMDStSingleS<1, 0b101, 0b00, "st4", VecListFours, GPR64pi16>; defm ST4 : SIMDStSingleD<1, 0b101, 0b01, "st4", VecListFourd, GPR64pi32>; } defm ST1 : SIMDLdSt1SingleAliases<"st1">; defm ST2 : SIMDLdSt2SingleAliases<"st2">; defm ST3 : SIMDLdSt3SingleAliases<"st3">; defm ST4 : SIMDLdSt4SingleAliases<"st4">; //---------------------------------------------------------------------------- // Crypto extensions //---------------------------------------------------------------------------- let Predicates = [HasAES] in { let isCommutable = 1 in { def AESErr : AESTiedInst<0b0100, "aese", int_aarch64_crypto_aese>; def AESDrr : AESTiedInst<0b0101, "aesd", int_aarch64_crypto_aesd>; } def AESMCrr : AESInst< 0b0110, "aesmc", int_aarch64_crypto_aesmc>; def AESIMCrr : AESInst< 0b0111, "aesimc", int_aarch64_crypto_aesimc>; } // Pseudo instructions for AESMCrr/AESIMCrr with a register constraint required // for AES fusion on some CPUs. let hasSideEffects = 0, mayStore = 0, mayLoad = 0, Predicates = [HasAES] in { def AESMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">, Sched<[WriteVq]>; def AESIMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">, Sched<[WriteVq]>; } // Only use constrained versions of AES(I)MC instructions if they are paired with // AESE/AESD. def : Pat<(v16i8 (int_aarch64_crypto_aesmc (v16i8 (int_aarch64_crypto_aese (v16i8 V128:$src1), (v16i8 V128:$src2))))), (v16i8 (AESMCrrTied (v16i8 (AESErr (v16i8 V128:$src1), (v16i8 V128:$src2)))))>, Requires<[HasFuseAES]>; def : Pat<(v16i8 (int_aarch64_crypto_aesimc (v16i8 (int_aarch64_crypto_aesd (v16i8 V128:$src1), (v16i8 V128:$src2))))), (v16i8 (AESIMCrrTied (v16i8 (AESDrr (v16i8 V128:$src1), (v16i8 V128:$src2)))))>, Requires<[HasFuseAES]>; let Predicates = [HasSHA2] in { def SHA1Crrr : SHATiedInstQSV<0b000, "sha1c", int_aarch64_crypto_sha1c>; def SHA1Prrr : SHATiedInstQSV<0b001, "sha1p", int_aarch64_crypto_sha1p>; def SHA1Mrrr : SHATiedInstQSV<0b010, "sha1m", int_aarch64_crypto_sha1m>; def SHA1SU0rrr : SHATiedInstVVV<0b011, "sha1su0", int_aarch64_crypto_sha1su0>; def SHA256Hrrr : SHATiedInstQQV<0b100, "sha256h", int_aarch64_crypto_sha256h>; def SHA256H2rrr : SHATiedInstQQV<0b101, "sha256h2",int_aarch64_crypto_sha256h2>; def SHA256SU1rrr :SHATiedInstVVV<0b110, "sha256su1",int_aarch64_crypto_sha256su1>; def SHA1Hrr : SHAInstSS< 0b0000, "sha1h", int_aarch64_crypto_sha1h>; def SHA1SU1rr : SHATiedInstVV<0b0001, "sha1su1", int_aarch64_crypto_sha1su1>; def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_aarch64_crypto_sha256su0>; } //---------------------------------------------------------------------------- // Compiler-pseudos //---------------------------------------------------------------------------- // FIXME: Like for X86, these should go in their own separate .td file. // For an anyext, we don't care what the high bits are, so we can perform an // INSERT_SUBREF into an IMPLICIT_DEF. def : Pat<(i64 (anyext GPR32:$src)), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>; // When we need to explicitly zero-extend, we use a 32-bit MOV instruction and // then assert the extension has happened. def : Pat<(i64 (zext GPR32:$src)), (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>; // To sign extend, we use a signed bitfield move instruction (SBFM) on the // containing super-reg. def : Pat<(i64 (sext GPR32:$src)), (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>; def : Pat<(i64 (sext_inreg GPR64:$src, i32)), (SBFMXri GPR64:$src, 0, 31)>; def : Pat<(i64 (sext_inreg GPR64:$src, i16)), (SBFMXri GPR64:$src, 0, 15)>; def : Pat<(i64 (sext_inreg GPR64:$src, i8)), (SBFMXri GPR64:$src, 0, 7)>; def : Pat<(i64 (sext_inreg GPR64:$src, i1)), (SBFMXri GPR64:$src, 0, 0)>; def : Pat<(i32 (sext_inreg GPR32:$src, i16)), (SBFMWri GPR32:$src, 0, 15)>; def : Pat<(i32 (sext_inreg GPR32:$src, i8)), (SBFMWri GPR32:$src, 0, 7)>; def : Pat<(i32 (sext_inreg GPR32:$src, i1)), (SBFMWri GPR32:$src, 0, 0)>; def : Pat<(shl (sext_inreg GPR32:$Rn, i8), (i64 imm0_31:$imm)), (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), (i64 (i32shift_sext_i8 imm0_31:$imm)))>; def : Pat<(shl (sext_inreg GPR64:$Rn, i8), (i64 imm0_63:$imm)), (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), (i64 (i64shift_sext_i8 imm0_63:$imm)))>; def : Pat<(shl (sext_inreg GPR32:$Rn, i16), (i64 imm0_31:$imm)), (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), (i64 (i32shift_sext_i16 imm0_31:$imm)))>; def : Pat<(shl (sext_inreg GPR64:$Rn, i16), (i64 imm0_63:$imm)), (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), (i64 (i64shift_sext_i16 imm0_63:$imm)))>; def : Pat<(shl (i64 (sext GPR32:$Rn)), (i64 imm0_63:$imm)), (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), (i64 (i64shift_a imm0_63:$imm)), (i64 (i64shift_sext_i32 imm0_63:$imm)))>; def : Pat<(shl (i64 (zext GPR32:$Rn)), (i64 imm0_63:$imm)), (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), (i64 (i64shift_a imm0_63:$imm)), (i64 (i64shift_sext_i32 imm0_63:$imm)))>; // sra patterns have an AddedComplexity of 10, so make sure we have a higher // AddedComplexity for the following patterns since we want to match sext + sra // patterns before we attempt to match a single sra node. let AddedComplexity = 20 in { // We support all sext + sra combinations which preserve at least one bit of the // original value which is to be sign extended. E.g. we support shifts up to // bitwidth-1 bits. def : Pat<(sra (sext_inreg GPR32:$Rn, i8), (i64 imm0_7:$imm)), (SBFMWri GPR32:$Rn, (i64 imm0_7:$imm), 7)>; def : Pat<(sra (sext_inreg GPR64:$Rn, i8), (i64 imm0_7:$imm)), (SBFMXri GPR64:$Rn, (i64 imm0_7:$imm), 7)>; def : Pat<(sra (sext_inreg GPR32:$Rn, i16), (i64 imm0_15:$imm)), (SBFMWri GPR32:$Rn, (i64 imm0_15:$imm), 15)>; def : Pat<(sra (sext_inreg GPR64:$Rn, i16), (i64 imm0_15:$imm)), (SBFMXri GPR64:$Rn, (i64 imm0_15:$imm), 15)>; def : Pat<(sra (i64 (sext GPR32:$Rn)), (i64 imm0_31:$imm)), (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), (i64 imm0_31:$imm), 31)>; } // AddedComplexity = 20 // To truncate, we can simply extract from a subregister. def : Pat<(i32 (trunc GPR64sp:$src)), (i32 (EXTRACT_SUBREG GPR64sp:$src, sub_32))>; // __builtin_trap() uses the BRK instruction on AArch64. def : Pat<(trap), (BRK 1)>; def : Pat<(debugtrap), (BRK 0xF000)>; def ubsan_trap_xform : SDNodeXFormgetTargetConstant(N->getZExtValue() | ('U' << 8), SDLoc(N), MVT::i32); }]>; def gi_ubsan_trap_xform : GICustomOperandRenderer<"renderUbsanTrap">, GISDNodeXFormEquiv; def ubsan_trap_imm : TImmLeaf(Imm); }], ubsan_trap_xform>; def : Pat<(ubsantrap ubsan_trap_imm:$kind), (BRK ubsan_trap_imm:$kind)>; // Multiply high patterns which multiply the lower subvector using smull/umull // and the upper subvector with smull2/umull2. Then shuffle the high the high // part of both results together. def : Pat<(v16i8 (mulhs V128:$Rn, V128:$Rm)), (UZP2v16i8 (SMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub), (EXTRACT_SUBREG V128:$Rm, dsub)), (SMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>; def : Pat<(v8i16 (mulhs V128:$Rn, V128:$Rm)), (UZP2v8i16 (SMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub), (EXTRACT_SUBREG V128:$Rm, dsub)), (SMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>; def : Pat<(v4i32 (mulhs V128:$Rn, V128:$Rm)), (UZP2v4i32 (SMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub), (EXTRACT_SUBREG V128:$Rm, dsub)), (SMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>; def : Pat<(v16i8 (mulhu V128:$Rn, V128:$Rm)), (UZP2v16i8 (UMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub), (EXTRACT_SUBREG V128:$Rm, dsub)), (UMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>; def : Pat<(v8i16 (mulhu V128:$Rn, V128:$Rm)), (UZP2v8i16 (UMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub), (EXTRACT_SUBREG V128:$Rm, dsub)), (UMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>; def : Pat<(v4i32 (mulhu V128:$Rn, V128:$Rm)), (UZP2v4i32 (UMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub), (EXTRACT_SUBREG V128:$Rm, dsub)), (UMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>; // Conversions within AdvSIMD types in the same register size are free. // But because we need a consistent lane ordering, in big endian many // conversions require one or more REV instructions. // // Consider a simple memory load followed by a bitconvert then a store. // v0 = load v2i32 // v1 = BITCAST v2i32 v0 to v4i16 // store v4i16 v2 // // In big endian mode every memory access has an implicit byte swap. LDR and // STR do a 64-bit byte swap, whereas LD1/ST1 do a byte swap per lane - that // is, they treat the vector as a sequence of elements to be byte-swapped. // The two pairs of instructions are fundamentally incompatible. We've decided // to use LD1/ST1 only to simplify compiler implementation. // // LD1/ST1 perform the equivalent of a sequence of LDR/STR + REV. This makes // the original code sequence: // v0 = load v2i32 // v1 = REV v2i32 (implicit) // v2 = BITCAST v2i32 v1 to v4i16 // v3 = REV v4i16 v2 (implicit) // store v4i16 v3 // // But this is now broken - the value stored is different to the value loaded // due to lane reordering. To fix this, on every BITCAST we must perform two // other REVs: // v0 = load v2i32 // v1 = REV v2i32 (implicit) // v2 = REV v2i32 // v3 = BITCAST v2i32 v2 to v4i16 // v4 = REV v4i16 // v5 = REV v4i16 v4 (implicit) // store v4i16 v5 // // This means an extra two instructions, but actually in most cases the two REV // instructions can be combined into one. For example: // (REV64_2s (REV64_4h X)) === (REV32_4h X) // // There is also no 128-bit REV instruction. This must be synthesized with an // EXT instruction. // // Most bitconverts require some sort of conversion. The only exceptions are: // a) Identity conversions - vNfX <-> vNiX // b) Single-lane-to-scalar - v1fX <-> fX or v1iX <-> iX // // Natural vector casts (64 bit) foreach VT = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, v1f64, f64 ] in foreach VT2 = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, v1f64, f64 ] in def : Pat<(VT (AArch64NvCast (VT2 FPR64:$src))), (VT FPR64:$src)>; // Natural vector casts (128 bit) foreach VT = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in foreach VT2 = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in def : Pat<(VT (AArch64NvCast (VT2 FPR128:$src))), (VT FPR128:$src)>; let Predicates = [IsLE] in { def : Pat<(v8i8 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; def : Pat<(v4f16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; def : Pat<(v4bf16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))), (COPY_TO_REGCLASS V64:$Vn, GPR64)>; def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))), (COPY_TO_REGCLASS V64:$Vn, GPR64)>; def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))), (COPY_TO_REGCLASS V64:$Vn, GPR64)>; def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))), (COPY_TO_REGCLASS V64:$Vn, GPR64)>; def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))), (COPY_TO_REGCLASS V64:$Vn, GPR64)>; def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))), (COPY_TO_REGCLASS V64:$Vn, GPR64)>; def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))), (COPY_TO_REGCLASS V64:$Vn, GPR64)>; } let Predicates = [IsBE] in { def : Pat<(v8i8 (bitconvert GPR64:$Xn)), (REV64v8i8 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; def : Pat<(v4f16 (bitconvert GPR64:$Xn)), (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; def : Pat<(v4bf16 (bitconvert GPR64:$Xn)), (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))), (REV64v8i8 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))), (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))), (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))), (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))), (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))), (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; } def : Pat<(v1i64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; def : Pat<(v1f64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; def : Pat<(i64 (bitconvert (v1i64 V64:$Vn))), (COPY_TO_REGCLASS V64:$Vn, GPR64)>; def : Pat<(v1i64 (scalar_to_vector GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; def : Pat<(v1f64 (scalar_to_vector GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>; def : Pat<(f32 (bitconvert (i32 GPR32:$Xn))), (COPY_TO_REGCLASS GPR32:$Xn, FPR32)>; def : Pat<(i32 (bitconvert (f32 FPR32:$Xn))), (COPY_TO_REGCLASS FPR32:$Xn, GPR32)>; def : Pat<(f64 (bitconvert (i64 GPR64:$Xn))), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; def : Pat<(i64 (bitconvert (f64 FPR64:$Xn))), (COPY_TO_REGCLASS FPR64:$Xn, GPR64)>; def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))), (COPY_TO_REGCLASS V64:$Vn, GPR64)>; def : Pat<(f16 (bitconvert (bf16 FPR16:$src))), (f16 FPR16:$src)>; def : Pat<(bf16 (bitconvert (f16 FPR16:$src))), (bf16 FPR16:$src)>; let Predicates = [IsLE] in { def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>; def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>; def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>; def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), (v1i64 FPR64:$src)>; def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))), (v1i64 FPR64:$src)>; def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>; } let Predicates = [IsBE] in { def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 (REV64v2i32 FPR64:$src))>; def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 (REV64v4i16 FPR64:$src))>; def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), (v1i64 (REV64v8i8 FPR64:$src))>; def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), (v1i64 (REV64v4i16 FPR64:$src))>; def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))), (v1i64 (REV64v4i16 FPR64:$src))>; def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), (v1i64 (REV64v2i32 FPR64:$src))>; } def : Pat<(v1i64 (bitconvert (v1f64 FPR64:$src))), (v1i64 FPR64:$src)>; def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>; let Predicates = [IsLE] in { def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), (v2i32 FPR64:$src)>; def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>; def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>; def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>; def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 FPR64:$src)>; def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), (v2i32 FPR64:$src)>; def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))), (v2i32 FPR64:$src)>; } let Predicates = [IsBE] in { def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), (v2i32 (REV64v2i32 FPR64:$src))>; def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), (v2i32 (REV32v4i16 FPR64:$src))>; def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), (v2i32 (REV32v8i8 FPR64:$src))>; def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 (REV64v2i32 FPR64:$src))>; def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 (REV64v2i32 FPR64:$src))>; def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), (v2i32 (REV32v4i16 FPR64:$src))>; def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))), (v2i32 (REV32v4i16 FPR64:$src))>; } def : Pat<(v2i32 (bitconvert (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>; let Predicates = [IsLE] in { def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), (v4i16 FPR64:$src)>; def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>; def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>; def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>; def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>; def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 FPR64:$src)>; } let Predicates = [IsBE] in { def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), (v4i16 (REV64v4i16 FPR64:$src))>; def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), (v4i16 (REV32v4i16 FPR64:$src))>; def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), (v4i16 (REV16v8i8 FPR64:$src))>; def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 (REV64v4i16 FPR64:$src))>; def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 (REV32v4i16 FPR64:$src))>; def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 (REV64v4i16 FPR64:$src))>; } def : Pat<(v4i16 (bitconvert (v4f16 FPR64:$src))), (v4i16 FPR64:$src)>; def : Pat<(v4i16 (bitconvert (v4bf16 FPR64:$src))), (v4i16 FPR64:$src)>; let Predicates = [IsLE] in { def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), (v4f16 FPR64:$src)>; def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>; def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>; def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))), (v4f16 FPR64:$src)>; def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), (v4f16 FPR64:$src)>; def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), (v4f16 FPR64:$src)>; def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))), (v4bf16 FPR64:$src)>; def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))), (v4bf16 FPR64:$src)>; def : Pat<(v4bf16 (bitconvert (v8i8 FPR64:$src))), (v4bf16 FPR64:$src)>; def : Pat<(v4bf16 (bitconvert (f64 FPR64:$src))), (v4bf16 FPR64:$src)>; def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))), (v4bf16 FPR64:$src)>; def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))), (v4bf16 FPR64:$src)>; } let Predicates = [IsBE] in { def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), (v4f16 (REV64v4i16 FPR64:$src))>; def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), (v4f16 (REV32v4i16 FPR64:$src))>; def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))), (v4f16 (REV16v8i8 FPR64:$src))>; def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))), (v4f16 (REV64v4i16 FPR64:$src))>; def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), (v4f16 (REV32v4i16 FPR64:$src))>; def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), (v4f16 (REV64v4i16 FPR64:$src))>; def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))), (v4bf16 (REV64v4i16 FPR64:$src))>; def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))), (v4bf16 (REV32v4i16 FPR64:$src))>; def : Pat<(v4bf16 (bitconvert (v8i8 FPR64:$src))), (v4bf16 (REV16v8i8 FPR64:$src))>; def : Pat<(v4bf16 (bitconvert (f64 FPR64:$src))), (v4bf16 (REV64v4i16 FPR64:$src))>; def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))), (v4bf16 (REV32v4i16 FPR64:$src))>; def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))), (v4bf16 (REV64v4i16 FPR64:$src))>; } def : Pat<(v4f16 (bitconvert (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>; def : Pat<(v4bf16 (bitconvert (v4i16 FPR64:$src))), (v4bf16 FPR64:$src)>; let Predicates = [IsLE] in { def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), (v8i8 FPR64:$src)>; def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>; def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>; def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>; def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>; def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), (v8i8 FPR64:$src)>; def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))), (v8i8 FPR64:$src)>; def : Pat<(v8i8 (bitconvert (v4bf16 FPR64:$src))), (v8i8 FPR64:$src)>; } let Predicates = [IsBE] in { def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), (v8i8 (REV64v8i8 FPR64:$src))>; def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), (v8i8 (REV32v8i8 FPR64:$src))>; def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))), (v8i8 (REV16v8i8 FPR64:$src))>; def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 (REV64v8i8 FPR64:$src))>; def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), (v8i8 (REV32v8i8 FPR64:$src))>; def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), (v8i8 (REV64v8i8 FPR64:$src))>; def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))), (v8i8 (REV16v8i8 FPR64:$src))>; def : Pat<(v8i8 (bitconvert (v4bf16 FPR64:$src))), (v8i8 (REV16v8i8 FPR64:$src))>; } let Predicates = [IsLE] in { def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), (f64 FPR64:$src)>; def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), (f64 FPR64:$src)>; def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), (f64 FPR64:$src)>; def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), (f64 FPR64:$src)>; def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))), (f64 FPR64:$src)>; def : Pat<(f64 (bitconvert (v4bf16 FPR64:$src))), (f64 FPR64:$src)>; } let Predicates = [IsBE] in { def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), (f64 (REV64v2i32 FPR64:$src))>; def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), (f64 (REV64v4i16 FPR64:$src))>; def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), (f64 (REV64v2i32 FPR64:$src))>; def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), (f64 (REV64v8i8 FPR64:$src))>; def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))), (f64 (REV64v4i16 FPR64:$src))>; def : Pat<(f64 (bitconvert (v4bf16 FPR64:$src))), (f64 (REV64v4i16 FPR64:$src))>; } def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>; def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>; let Predicates = [IsLE] in { def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), (v1f64 FPR64:$src)>; def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), (v1f64 FPR64:$src)>; def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), (v1f64 FPR64:$src)>; def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>; def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), (v1f64 FPR64:$src)>; def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))), (v1f64 FPR64:$src)>; } let Predicates = [IsBE] in { def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), (v1f64 (REV64v2i32 FPR64:$src))>; def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), (v1f64 (REV64v4i16 FPR64:$src))>; def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), (v1f64 (REV64v8i8 FPR64:$src))>; def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), (v1f64 (REV64v2i32 FPR64:$src))>; def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), (v1f64 (REV64v4i16 FPR64:$src))>; def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))), (v1f64 (REV64v4i16 FPR64:$src))>; } def : Pat<(v1f64 (bitconvert (v1i64 FPR64:$src))), (v1f64 FPR64:$src)>; def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>; let Predicates = [IsLE] in { def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), (v2f32 FPR64:$src)>; def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), (v2f32 FPR64:$src)>; def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), (v2f32 FPR64:$src)>; def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), (v2f32 FPR64:$src)>; def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>; def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), (v2f32 FPR64:$src)>; def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))), (v2f32 FPR64:$src)>; } let Predicates = [IsBE] in { def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), (v2f32 (REV64v2i32 FPR64:$src))>; def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), (v2f32 (REV32v4i16 FPR64:$src))>; def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), (v2f32 (REV32v8i8 FPR64:$src))>; def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), (v2f32 (REV64v2i32 FPR64:$src))>; def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 (REV64v2i32 FPR64:$src))>; def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), (v2f32 (REV32v4i16 FPR64:$src))>; def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))), (v2f32 (REV32v4i16 FPR64:$src))>; } def : Pat<(v2f32 (bitconvert (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>; let Predicates = [IsLE] in { def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), (f128 FPR128:$src)>; def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), (f128 FPR128:$src)>; def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), (f128 FPR128:$src)>; def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 FPR128:$src)>; def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), (f128 FPR128:$src)>; def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), (f128 FPR128:$src)>; def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))), (f128 FPR128:$src)>; def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), (f128 FPR128:$src)>; } let Predicates = [IsBE] in { def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>; def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), (f128 (EXTv16i8 (REV64v4i32 FPR128:$src), (REV64v4i32 FPR128:$src), (i32 8)))>; def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), (f128 (EXTv16i8 (REV64v8i16 FPR128:$src), (REV64v8i16 FPR128:$src), (i32 8)))>; def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), (f128 (EXTv16i8 (REV64v8i16 FPR128:$src), (REV64v8i16 FPR128:$src), (i32 8)))>; def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))), (f128 (EXTv16i8 (REV64v8i16 FPR128:$src), (REV64v8i16 FPR128:$src), (i32 8)))>; def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>; def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), (f128 (EXTv16i8 (REV64v4i32 FPR128:$src), (REV64v4i32 FPR128:$src), (i32 8)))>; def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), (f128 (EXTv16i8 (REV64v16i8 FPR128:$src), (REV64v16i8 FPR128:$src), (i32 8)))>; } let Predicates = [IsLE] in { def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>; def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>; def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>; def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), (v2f64 FPR128:$src)>; def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))), (v2f64 FPR128:$src)>; def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>; def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>; } let Predicates = [IsBE] in { def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>; def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 (REV64v4i32 FPR128:$src))>; def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 (REV64v8i16 FPR128:$src))>; def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), (v2f64 (REV64v8i16 FPR128:$src))>; def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))), (v2f64 (REV64v8i16 FPR128:$src))>; def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 (REV64v16i8 FPR128:$src))>; def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 (REV64v4i32 FPR128:$src))>; } def : Pat<(v2f64 (bitconvert (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>; let Predicates = [IsLE] in { def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>; def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>; def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), (v4f32 FPR128:$src)>; def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))), (v4f32 FPR128:$src)>; def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>; def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>; def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>; } let Predicates = [IsBE] in { def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 (EXTv16i8 (REV64v4i32 FPR128:$src), (REV64v4i32 FPR128:$src), (i32 8)))>; def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 (REV32v8i16 FPR128:$src))>; def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), (v4f32 (REV32v8i16 FPR128:$src))>; def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))), (v4f32 (REV32v8i16 FPR128:$src))>; def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 (REV32v16i8 FPR128:$src))>; def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 (REV64v4i32 FPR128:$src))>; def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 (REV64v4i32 FPR128:$src))>; } def : Pat<(v4f32 (bitconvert (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>; let Predicates = [IsLE] in { def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>; def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>; def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>; def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>; def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>; def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), (v2i64 FPR128:$src)>; def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))), (v2i64 FPR128:$src)>; } let Predicates = [IsBE] in { def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>; def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), (v2i64 (REV64v4i32 FPR128:$src))>; def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), (v2i64 (REV64v8i16 FPR128:$src))>; def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), (v2i64 (REV64v16i8 FPR128:$src))>; def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), (v2i64 (REV64v4i32 FPR128:$src))>; def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), (v2i64 (REV64v8i16 FPR128:$src))>; def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))), (v2i64 (REV64v8i16 FPR128:$src))>; } def : Pat<(v2i64 (bitconvert (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>; let Predicates = [IsLE] in { def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>; def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>; def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>; def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>; def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>; def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), (v4i32 FPR128:$src)>; def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))), (v4i32 FPR128:$src)>; } let Predicates = [IsBE] in { def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 (EXTv16i8 (REV64v4i32 FPR128:$src), (REV64v4i32 FPR128:$src), (i32 8)))>; def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), (v4i32 (REV64v4i32 FPR128:$src))>; def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), (v4i32 (REV32v8i16 FPR128:$src))>; def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), (v4i32 (REV32v16i8 FPR128:$src))>; def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), (v4i32 (REV64v4i32 FPR128:$src))>; def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), (v4i32 (REV32v8i16 FPR128:$src))>; def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))), (v4i32 (REV32v8i16 FPR128:$src))>; } def : Pat<(v4i32 (bitconvert (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>; let Predicates = [IsLE] in { def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>; def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>; def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>; def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>; def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>; def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>; } let Predicates = [IsBE] in { def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 (EXTv16i8 (REV64v8i16 FPR128:$src), (REV64v8i16 FPR128:$src), (i32 8)))>; def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), (v8i16 (REV64v8i16 FPR128:$src))>; def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), (v8i16 (REV32v8i16 FPR128:$src))>; def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), (v8i16 (REV16v16i8 FPR128:$src))>; def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), (v8i16 (REV64v8i16 FPR128:$src))>; def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 (REV32v8i16 FPR128:$src))>; } def : Pat<(v8i16 (bitconvert (v8f16 FPR128:$src))), (v8i16 FPR128:$src)>; def : Pat<(v8i16 (bitconvert (v8bf16 FPR128:$src))), (v8i16 FPR128:$src)>; let Predicates = [IsLE] in { def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))), (v8f16 FPR128:$src)>; def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>; def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>; def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>; def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>; def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>; def : Pat<(v8bf16 (bitconvert (f128 FPR128:$src))), (v8bf16 FPR128:$src)>; def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))), (v8bf16 FPR128:$src)>; def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))), (v8bf16 FPR128:$src)>; def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))), (v8bf16 FPR128:$src)>; def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))), (v8bf16 FPR128:$src)>; def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))), (v8bf16 FPR128:$src)>; } let Predicates = [IsBE] in { def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))), (v8f16 (EXTv16i8 (REV64v8i16 FPR128:$src), (REV64v8i16 FPR128:$src), (i32 8)))>; def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), (v8f16 (REV64v8i16 FPR128:$src))>; def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), (v8f16 (REV32v8i16 FPR128:$src))>; def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), (v8f16 (REV16v16i8 FPR128:$src))>; def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), (v8f16 (REV64v8i16 FPR128:$src))>; def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), (v8f16 (REV32v8i16 FPR128:$src))>; def : Pat<(v8bf16 (bitconvert (f128 FPR128:$src))), (v8bf16 (EXTv16i8 (REV64v8i16 FPR128:$src), (REV64v8i16 FPR128:$src), (i32 8)))>; def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))), (v8bf16 (REV64v8i16 FPR128:$src))>; def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))), (v8bf16 (REV32v8i16 FPR128:$src))>; def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))), (v8bf16 (REV16v16i8 FPR128:$src))>; def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))), (v8bf16 (REV64v8i16 FPR128:$src))>; def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))), (v8bf16 (REV32v8i16 FPR128:$src))>; } def : Pat<(v8f16 (bitconvert (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>; def : Pat<(v8bf16 (bitconvert (v8i16 FPR128:$src))), (v8bf16 FPR128:$src)>; let Predicates = [IsLE] in { def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>; def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>; def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>; def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>; def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>; def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>; def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), (v16i8 FPR128:$src)>; def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))), (v16i8 FPR128:$src)>; } let Predicates = [IsBE] in { def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 (EXTv16i8 (REV64v16i8 FPR128:$src), (REV64v16i8 FPR128:$src), (i32 8)))>; def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), (v16i8 (REV64v16i8 FPR128:$src))>; def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), (v16i8 (REV32v16i8 FPR128:$src))>; def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), (v16i8 (REV16v16i8 FPR128:$src))>; def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), (v16i8 (REV64v16i8 FPR128:$src))>; def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), (v16i8 (REV32v16i8 FPR128:$src))>; def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), (v16i8 (REV16v16i8 FPR128:$src))>; def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))), (v16i8 (REV16v16i8 FPR128:$src))>; } def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 0))), (EXTRACT_SUBREG V128:$Rn, dsub)>; def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 0))), (EXTRACT_SUBREG V128:$Rn, dsub)>; def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 0))), (EXTRACT_SUBREG V128:$Rn, dsub)>; def : Pat<(v4f16 (extract_subvector V128:$Rn, (i64 0))), (EXTRACT_SUBREG V128:$Rn, dsub)>; def : Pat<(v4bf16 (extract_subvector V128:$Rn, (i64 0))), (EXTRACT_SUBREG V128:$Rn, dsub)>; def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 0))), (EXTRACT_SUBREG V128:$Rn, dsub)>; def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 0))), (EXTRACT_SUBREG V128:$Rn, dsub)>; def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 0))), (EXTRACT_SUBREG V128:$Rn, dsub)>; def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))), (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; def : Pat<(v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 1))), (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; def : Pat<(v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 1))), (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; def : Pat<(v1i64 (extract_subvector (v2i64 FPR128:$Rn), (i64 1))), (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; // A 64-bit subvector insert to the first 128-bit vector position // is a subregister copy that needs no instruction. multiclass InsertSubvectorUndef { def : Pat<(insert_subvector undef, (v1i64 FPR64:$src), (Ty 0)), (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; def : Pat<(insert_subvector undef, (v1f64 FPR64:$src), (Ty 0)), (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; def : Pat<(insert_subvector undef, (v2i32 FPR64:$src), (Ty 0)), (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$src, dsub)>; def : Pat<(insert_subvector undef, (v2f32 FPR64:$src), (Ty 0)), (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR64:$src, dsub)>; def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (Ty 0)), (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; def : Pat<(insert_subvector undef, (v4f16 FPR64:$src), (Ty 0)), (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; def : Pat<(insert_subvector undef, (v4bf16 FPR64:$src), (Ty 0)), (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (Ty 0)), (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>; } defm : InsertSubvectorUndef; defm : InsertSubvectorUndef; // Use pair-wise add instructions when summing up the lanes for v2f64, v2i64 // or v2f32. def : Pat<(i64 (add (vector_extract (v2i64 FPR128:$Rn), (i64 0)), (vector_extract (v2i64 FPR128:$Rn), (i64 1)))), (i64 (ADDPv2i64p (v2i64 FPR128:$Rn)))>; def : Pat<(f64 (any_fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)), (vector_extract (v2f64 FPR128:$Rn), (i64 1)))), (f64 (FADDPv2i64p (v2f64 FPR128:$Rn)))>; // vector_extract on 64-bit vectors gets promoted to a 128 bit vector, // so we match on v4f32 here, not v2f32. This will also catch adding // the low two lanes of a true v4f32 vector. def : Pat<(any_fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)), (vector_extract (v4f32 FPR128:$Rn), (i64 1))), (f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>; def : Pat<(any_fadd (vector_extract (v8f16 FPR128:$Rn), (i64 0)), (vector_extract (v8f16 FPR128:$Rn), (i64 1))), (f16 (FADDPv2i16p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>; // Prefer using the bottom lanes of addp Rn, Rn compared to // addp extractlow(Rn), extracthigh(Rn) def : Pat<(AArch64addp (v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 0))), (v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 2)))), (v2i32 (EXTRACT_SUBREG (ADDPv4i32 $Rn, $Rn), dsub))>; def : Pat<(AArch64addp (v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 0))), (v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 4)))), (v4i16 (EXTRACT_SUBREG (ADDPv8i16 $Rn, $Rn), dsub))>; def : Pat<(AArch64addp (v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 0))), (v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 8)))), (v8i8 (EXTRACT_SUBREG (ADDPv16i8 $Rn, $Rn), dsub))>; def : Pat<(AArch64faddp (v2f32 (extract_subvector (v4f32 FPR128:$Rn), (i64 0))), (v2f32 (extract_subvector (v4f32 FPR128:$Rn), (i64 2)))), (v2f32 (EXTRACT_SUBREG (FADDPv4f32 $Rn, $Rn), dsub))>; def : Pat<(AArch64faddp (v4f16 (extract_subvector (v8f16 FPR128:$Rn), (i64 0))), (v4f16 (extract_subvector (v8f16 FPR128:$Rn), (i64 4)))), (v4f16 (EXTRACT_SUBREG (FADDPv8f16 $Rn, $Rn), dsub))>; // add(uzp1(X, Y), uzp2(X, Y)) -> addp(X, Y) def : Pat<(v2i64 (add (AArch64zip1 (v2i64 FPR128:$Rn), (v2i64 FPR128:$Rm)), (AArch64zip2 (v2i64 FPR128:$Rn), (v2i64 FPR128:$Rm)))), (v2i64 (ADDPv2i64 $Rn, $Rm))>; def : Pat<(v4i32 (add (AArch64uzp1 (v4i32 FPR128:$Rn), (v4i32 FPR128:$Rm)), (AArch64uzp2 (v4i32 FPR128:$Rn), (v4i32 FPR128:$Rm)))), (v4i32 (ADDPv4i32 $Rn, $Rm))>; def : Pat<(v8i16 (add (AArch64uzp1 (v8i16 FPR128:$Rn), (v8i16 FPR128:$Rm)), (AArch64uzp2 (v8i16 FPR128:$Rn), (v8i16 FPR128:$Rm)))), (v8i16 (ADDPv8i16 $Rn, $Rm))>; def : Pat<(v16i8 (add (AArch64uzp1 (v16i8 FPR128:$Rn), (v16i8 FPR128:$Rm)), (AArch64uzp2 (v16i8 FPR128:$Rn), (v16i8 FPR128:$Rm)))), (v16i8 (ADDPv16i8 $Rn, $Rm))>; def : Pat<(v2f64 (fadd (AArch64zip1 (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm)), (AArch64zip2 (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm)))), (v2f64 (FADDPv2f64 $Rn, $Rm))>; def : Pat<(v4f32 (fadd (AArch64uzp1 (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm)), (AArch64uzp2 (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm)))), (v4f32 (FADDPv4f32 $Rn, $Rm))>; let Predicates = [HasFullFP16] in def : Pat<(v8f16 (fadd (AArch64uzp1 (v8f16 FPR128:$Rn), (v8f16 FPR128:$Rm)), (AArch64uzp2 (v8f16 FPR128:$Rn), (v8f16 FPR128:$Rm)))), (v8f16 (FADDPv8f16 $Rn, $Rm))>; // Scalar 64-bit shifts in FPR64 registers. def : Pat<(i64 (int_aarch64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), (SSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; def : Pat<(i64 (int_aarch64_neon_ushl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), (USHLv1i64 FPR64:$Rn, FPR64:$Rm)>; def : Pat<(i64 (int_aarch64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), (SRSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; def : Pat<(i64 (int_aarch64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), (URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; // Patterns for nontemporal/no-allocate stores. // We have to resort to tricks to turn a single-input store into a store pair, // because there is no single-input nontemporal store, only STNP. let Predicates = [IsLE] in { let AddedComplexity = 15 in { class NTStore128Pat : Pat<(nontemporalstore (VT FPR128:$Rt), (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)), (STNPDi (EXTRACT_SUBREG FPR128:$Rt, dsub), (DUPi64 FPR128:$Rt, (i64 1)), GPR64sp:$Rn, simm7s8:$offset)>; def : NTStore128Pat; def : NTStore128Pat; def : NTStore128Pat; def : NTStore128Pat; class NTStore64Pat : Pat<(nontemporalstore (VT FPR64:$Rt), (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)), (STNPSi (EXTRACT_SUBREG FPR64:$Rt, ssub), (DUPi32 (SUBREG_TO_REG (i64 0), FPR64:$Rt, dsub), (i64 1)), GPR64sp:$Rn, simm7s4:$offset)>; // FIXME: Shouldn't v1f64 loads/stores be promoted to v1i64? def : NTStore64Pat; def : NTStore64Pat; def : NTStore64Pat; def : NTStore64Pat; def : NTStore64Pat; def : Pat<(nontemporalstore GPR64:$Rt, (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)), (STNPWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), (EXTRACT_SUBREG (UBFMXri GPR64:$Rt, 32, 63), sub_32), GPR64sp:$Rn, simm7s4:$offset)>; } // AddedComplexity=10 } // Predicates = [IsLE] // Tail call return handling. These are all compiler pseudo-instructions, // so no encoding information or anything like that. let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in { def TCRETURNdi : Pseudo<(outs), (ins i64imm:$dst, i32imm:$FPDiff), []>, Sched<[WriteBrReg]>; def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>, Sched<[WriteBrReg]>; // Indirect tail-call with any register allowed, used by MachineOutliner when // this is proven safe. // FIXME: If we have to add any more hacks like this, we should instead relax // some verifier checks for outlined functions. def TCRETURNriALL : Pseudo<(outs), (ins GPR64:$dst, i32imm:$FPDiff), []>, Sched<[WriteBrReg]>; // Indirect tail-calls with reduced register classes, needed for BTI and // PAuthLR. def TCRETURNrix16x17 : Pseudo<(outs), (ins tcGPRx16x17:$dst, i32imm:$FPDiff), []>, Sched<[WriteBrReg]>; def TCRETURNrix17 : Pseudo<(outs), (ins tcGPRx17:$dst, i32imm:$FPDiff), []>, Sched<[WriteBrReg]>; def TCRETURNrinotx16 : Pseudo<(outs), (ins tcGPRnotx16:$dst, i32imm:$FPDiff), []>, Sched<[WriteBrReg]>; } def : Pat<(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff)), (TCRETURNri tcGPR64:$dst, imm:$FPDiff)>, Requires<[TailCallAny]>; def : Pat<(AArch64tcret tcGPRx16x17:$dst, (i32 timm:$FPDiff)), (TCRETURNrix16x17 tcGPRx16x17:$dst, imm:$FPDiff)>, Requires<[TailCallX16X17]>; def : Pat<(AArch64tcret tcGPRx17:$dst, (i32 timm:$FPDiff)), (TCRETURNrix17 tcGPRx17:$dst, imm:$FPDiff)>, Requires<[TailCallX17]>; def : Pat<(AArch64tcret tcGPRnotx16:$dst, (i32 timm:$FPDiff)), (TCRETURNrinotx16 tcGPRnotx16:$dst, imm:$FPDiff)>, Requires<[TailCallNotX16]>; def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)), (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>; def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)), (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>; def MOVMCSym : Pseudo<(outs GPR64:$dst), (ins i64imm:$sym), []>, Sched<[]>; def : Pat<(i64 (AArch64LocalRecover mcsym:$sym)), (MOVMCSym mcsym:$sym)>; // Extracting lane zero is a special case where we can just use a plain // EXTRACT_SUBREG instruction, which will become FMOV. This is easier for the // rest of the compiler, especially the register allocator and copy propagation, // to reason about, so is preferred when it's possible to use it. let AddedComplexity = 10 in { def : Pat<(i64 (extractelt (v2i64 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, dsub)>; def : Pat<(i32 (extractelt (v4i32 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, ssub)>; def : Pat<(i32 (extractelt (v2i32 V64:$V), (i64 0))), (EXTRACT_SUBREG V64:$V, ssub)>; } // dot_v4i8 class mul_v4i8 : PatFrag<(ops node:$Rn, node:$Rm, node:$offset), (mul (ldop (add node:$Rn, node:$offset)), (ldop (add node:$Rm, node:$offset)))>; class mulz_v4i8 : PatFrag<(ops node:$Rn, node:$Rm), (mul (ldop node:$Rn), (ldop node:$Rm))>; def load_v4i8 : OutPatFrag<(ops node:$R), (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (i32 (COPY_TO_REGCLASS (LDRWui node:$R, (i64 0)), FPR32)), ssub)>; class dot_v4i8 : Pat<(i32 (add (mul_v4i8 GPR64sp:$Rn, GPR64sp:$Rm, (i64 3)), (add (mul_v4i8 GPR64sp:$Rn, GPR64sp:$Rm, (i64 2)), (add (mul_v4i8 GPR64sp:$Rn, GPR64sp:$Rm, (i64 1)), (mulz_v4i8 GPR64sp:$Rn, GPR64sp:$Rm))))), (EXTRACT_SUBREG (i64 (DOT (DUPv2i32gpr WZR), (load_v4i8 GPR64sp:$Rn), (load_v4i8 GPR64sp:$Rm))), sub_32)>, Requires<[HasDotProd]>; // dot_v8i8 class ee_v8i8 : PatFrag<(ops node:$V, node:$K), (v4i16 (extract_subvector (v8i16 (extend node:$V)), node:$K))>; class mul_v8i8 : PatFrag<(ops node:$M, node:$N, node:$K), (mulop (v4i16 (ee_v8i8 node:$M, node:$K)), (v4i16 (ee_v8i8 node:$N, node:$K)))>; class idot_v8i8 : PatFrag<(ops node:$M, node:$N), (i32 (extractelt (v4i32 (AArch64uaddv (add (mul_v8i8 node:$M, node:$N, (i64 0)), (mul_v8i8 node:$M, node:$N, (i64 4))))), (i64 0)))>; // vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm def VADDV_32 : OutPatFrag<(ops node:$R), (ADDPv2i32 node:$R, node:$R)>; class odot_v8i8 : OutPatFrag<(ops node:$Vm, node:$Vn), (EXTRACT_SUBREG (VADDV_32 (i64 (DOT (DUPv2i32gpr WZR), (v8i8 node:$Vm), (v8i8 node:$Vn)))), sub_32)>; class dot_v8i8 : Pat<(idot_v8i8 V64:$Vm, V64:$Vn), (odot_v8i8 V64:$Vm, V64:$Vn)>, Requires<[HasDotProd]>; // dot_v16i8 class ee_v16i8 : PatFrag<(ops node:$V, node:$K1, node:$K2), (v4i16 (extract_subvector (v8i16 (extend (v8i8 (extract_subvector node:$V, node:$K1)))), node:$K2))>; class mul_v16i8 : PatFrag<(ops node:$M, node:$N, node:$K1, node:$K2), (v4i32 (mulop (v4i16 (ee_v16i8 node:$M, node:$K1, node:$K2)), (v4i16 (ee_v16i8 node:$N, node:$K1, node:$K2))))>; class idot_v16i8 : PatFrag<(ops node:$M, node:$N), (i32 (extractelt (v4i32 (AArch64uaddv (add (add (mul_v16i8 node:$M, node:$N, (i64 0), (i64 0)), (mul_v16i8 node:$M, node:$N, (i64 8), (i64 0))), (add (mul_v16i8 node:$M, node:$N, (i64 0), (i64 4)), (mul_v16i8 node:$M, node:$N, (i64 8), (i64 4)))))), (i64 0)))>; class odot_v16i8 : OutPatFrag<(ops node:$Vm, node:$Vn), (i32 (ADDVv4i32v (DOT (DUPv4i32gpr WZR), node:$Vm, node:$Vn)))>; class dot_v16i8 : Pat<(idot_v16i8 V128:$Vm, V128:$Vn), (odot_v16i8 V128:$Vm, V128:$Vn)>, Requires<[HasDotProd]>; let AddedComplexity = 10 in { def : dot_v4i8; def : dot_v4i8; def : dot_v8i8; def : dot_v8i8; def : dot_v16i8; def : dot_v16i8; // FIXME: add patterns to generate vector by element dot product. // FIXME: add SVE dot-product patterns. } // Custom DAG nodes and isel rules to make a 64-byte block out of eight GPRs, // so that it can be used as input to inline asm, and vice versa. def LS64_BUILD : SDNode<"AArch64ISD::LS64_BUILD", SDTypeProfile<1, 8, []>>; def LS64_EXTRACT : SDNode<"AArch64ISD::LS64_EXTRACT", SDTypeProfile<1, 2, []>>; def : Pat<(i64x8 (LS64_BUILD GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3, GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7)), (REG_SEQUENCE GPR64x8Class, $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3, $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7)>; foreach i = 0-7 in { def : Pat<(i64 (LS64_EXTRACT (i64x8 GPR64x8:$val), (i32 i))), (EXTRACT_SUBREG $val, !cast("x8sub_"#i))>; } let Predicates = [HasLS64] in { def LD64B: LoadStore64B<0b101, "ld64b", (ins GPR64sp:$Rn), (outs GPR64x8:$Rt)>; def ST64B: LoadStore64B<0b001, "st64b", (ins GPR64x8:$Rt, GPR64sp:$Rn), (outs)>; def ST64BV: Store64BV<0b011, "st64bv">; def ST64BV0: Store64BV<0b010, "st64bv0">; class ST64BPattern : Pat<(intrinsic GPR64sp:$addr, GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3, GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7), (instruction (REG_SEQUENCE GPR64x8Class, $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3, $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7), $addr)>; def : ST64BPattern; def : ST64BPattern; def : ST64BPattern; } let Predicates = [HasMOPS] in { let Defs = [NZCV] in { defm CPYFP : MOPSMemoryCopyInsns<0b00, "cpyfp">; defm CPYP : MOPSMemoryMoveInsns<0b00, "cpyp">; defm SETP : MOPSMemorySetInsns<0b00, "setp">; } let Uses = [NZCV] in { defm CPYFM : MOPSMemoryCopyInsns<0b01, "cpyfm">; defm CPYFE : MOPSMemoryCopyInsns<0b10, "cpyfe">; defm CPYM : MOPSMemoryMoveInsns<0b01, "cpym">; defm CPYE : MOPSMemoryMoveInsns<0b10, "cpye">; defm SETM : MOPSMemorySetInsns<0b01, "setm">; defm SETE : MOPSMemorySetInsns<0b10, "sete">; } } let Predicates = [HasMOPS, HasMTE] in { let Defs = [NZCV] in { defm SETGP : MOPSMemorySetTaggingInsns<0b00, "setgp">; } let Uses = [NZCV] in { defm SETGM : MOPSMemorySetTaggingInsns<0b01, "setgm">; // Can't use SETGE because it's a reserved name in TargetSelectionDAG.td defm MOPSSETGE : MOPSMemorySetTaggingInsns<0b10, "setge">; } } // MOPS Node operands: 0: Dst, 1: Src or Value, 2: Size, 3: Chain // MOPS Node results: 0: Dst writeback, 1: Size writeback, 2: Chain def SDT_AArch64mops : SDTypeProfile<2, 3, [ SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2> ]>; def AArch64mops_memset : SDNode<"AArch64ISD::MOPS_MEMSET", SDT_AArch64mops>; def AArch64mops_memset_tagging : SDNode<"AArch64ISD::MOPS_MEMSET_TAGGING", SDT_AArch64mops>; def AArch64mops_memcopy : SDNode<"AArch64ISD::MOPS_MEMCOPY", SDT_AArch64mops>; def AArch64mops_memmove : SDNode<"AArch64ISD::MOPS_MEMMOVE", SDT_AArch64mops>; // MOPS operations always contain three 4-byte instructions let Predicates = [HasMOPS], Defs = [NZCV], Size = 12, mayStore = 1 in { let mayLoad = 1 in { def MOPSMemoryCopyPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb), (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn), [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>; def MOPSMemoryMovePseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb), (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn), [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>; } let mayLoad = 0 in { def MOPSMemorySetPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb), (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm), [], "$Rd = $Rd_wb,$Rn = $Rn_wb,@earlyclobber $Rn_wb">, Sched<[]>; } } let Predicates = [HasMOPS, HasMTE], Defs = [NZCV], Size = 12, mayLoad = 0, mayStore = 1 in { def MOPSMemorySetTaggingPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb), (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm), [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>; } //----------------------------------------------------------------------------- // v8.3 Pointer Authentication late patterns def : Pat<(int_ptrauth_blend GPR64:$Rd, imm64_0_65535:$imm), (PAUTH_BLEND GPR64:$Rd, (trunc_imm imm64_0_65535:$imm))>; def : Pat<(int_ptrauth_blend GPR64:$Rd, GPR64:$Rn), (BFMXri GPR64:$Rd, GPR64:$Rn, 16, 15)>; //----------------------------------------------------------------------------- // This gets lowered into an instruction sequence of 20 bytes let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1, Size = 20 in def StoreSwiftAsyncContext : Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset), []>, Sched<[]>; def AArch64AssertZExtBool : SDNode<"AArch64ISD::ASSERT_ZEXT_BOOL", SDT_assert>; def : Pat<(AArch64AssertZExtBool GPR32:$op), (i32 GPR32:$op)>; //===----------------------------===// // 2022 Architecture Extensions: //===----------------------------===// def : InstAlias<"clrbhb", (HINT 22), 0>; let Predicates = [HasCLRBHB] in { def : InstAlias<"clrbhb", (HINT 22), 1>; } //===----------------------------------------------------------------------===// // Translation Hardening Extension (FEAT_THE) //===----------------------------------------------------------------------===// defm RCW : ReadCheckWriteCompareAndSwap; defm RCWCLR : ReadCheckWriteOperation<0b001, "clr">; defm RCWSET : ReadCheckWriteOperation<0b011, "set">; defm RCWSWP : ReadCheckWriteOperation<0b010, "swp">; //===----------------------------------------------------------------------===// // General Data-Processing Instructions (FEAT_V94_DP) //===----------------------------------------------------------------------===// defm ABS : OneOperandData<0b001000, "abs", abs>, Requires<[HasCSSC]>; defm CNT : OneOperandData<0b000111, "cnt", ctpop>, Requires<[HasCSSC]>; defm CTZ : OneOperandData<0b000110, "ctz", cttz>, Requires<[HasCSSC]>; defm SMAX : ComparisonOp<0, 0, "smax", smax>, Requires<[HasCSSC]>; defm SMIN : ComparisonOp<0, 1, "smin", smin>, Requires<[HasCSSC]>; defm UMAX : ComparisonOp<1, 0, "umax", umax>, Requires<[HasCSSC]>; defm UMIN : ComparisonOp<1, 1, "umin", umin>, Requires<[HasCSSC]>; def RPRFM: I<(outs), (ins rprfop:$Rt, GPR64:$Rm, GPR64sp:$Rn), "rprfm", "\t$Rt, $Rm, [$Rn]", "", []>, Sched<[]> { bits<6> Rt; bits<5> Rn; bits<5> Rm; let Inst{2-0} = Rt{2-0}; let Inst{4-3} = 0b11; let Inst{9-5} = Rn; let Inst{11-10} = 0b10; let Inst{13-12} = Rt{4-3}; let Inst{14} = 0b1; let Inst{15} = Rt{5}; let Inst{20-16} = Rm; let Inst{31-21} = 0b11111000101; let mayLoad = 0; let mayStore = 0; let hasSideEffects = 1; // RPRFM overlaps with PRFM (reg), when the decoder method of PRFM returns // Fail, the decoder should attempt to decode RPRFM. This requires setting // the decoder namespace to "Fallback". let DecoderNamespace = "Fallback"; } //===----------------------------------------------------------------------===// // 128-bit Atomics (FEAT_LSE128) //===----------------------------------------------------------------------===// let Predicates = [HasLSE128] in { def SWPP : LSE128Base<0b000, 0b00, 0b1, "swpp">; def SWPPA : LSE128Base<0b000, 0b10, 0b1, "swppa">; def SWPPAL : LSE128Base<0b000, 0b11, 0b1, "swppal">; def SWPPL : LSE128Base<0b000, 0b01, 0b1, "swppl">; def LDCLRP : LSE128Base<0b001, 0b00, 0b0, "ldclrp">; def LDCLRPA : LSE128Base<0b001, 0b10, 0b0, "ldclrpa">; def LDCLRPAL : LSE128Base<0b001, 0b11, 0b0, "ldclrpal">; def LDCLRPL : LSE128Base<0b001, 0b01, 0b0, "ldclrpl">; def LDSETP : LSE128Base<0b011, 0b00, 0b0, "ldsetp">; def LDSETPA : LSE128Base<0b011, 0b10, 0b0, "ldsetpa">; def LDSETPAL : LSE128Base<0b011, 0b11, 0b0, "ldsetpal">; def LDSETPL : LSE128Base<0b011, 0b01, 0b0, "ldsetpl">; } //===----------------------------------------------------------------------===// // RCPC Instructions (FEAT_LRCPC3) //===----------------------------------------------------------------------===// let Predicates = [HasRCPC3] in { // size opc opc2 def STILPWpre: BaseLRCPC3IntegerLoadStorePair<0b10, 0b00, 0b0000, (outs GPR64sp:$wback), (ins GPR32:$Rt, GPR32:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn, #-8]!", "$Rn = $wback">; def STILPXpre: BaseLRCPC3IntegerLoadStorePair<0b11, 0b00, 0b0000, (outs GPR64sp:$wback), (ins GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn, #-16]!", "$Rn = $wback">; def STILPW: BaseLRCPC3IntegerLoadStorePair<0b10, 0b00, 0b0001, (outs), (ins GPR32:$Rt, GPR32:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn]", "">; def STILPX: BaseLRCPC3IntegerLoadStorePair<0b11, 0b00, 0b0001, (outs), (ins GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn]", "">; def LDIAPPWpost: BaseLRCPC3IntegerLoadStorePair<0b10, 0b01, 0b0000, (outs GPR64sp:$wback, GPR32:$Rt, GPR32:$Rt2), (ins GPR64sp:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn], #8", "$Rn = $wback">; def LDIAPPXpost: BaseLRCPC3IntegerLoadStorePair<0b11, 0b01, 0b0000, (outs GPR64sp:$wback, GPR64:$Rt, GPR64:$Rt2), (ins GPR64sp:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn], #16", "$Rn = $wback">; def LDIAPPW: BaseLRCPC3IntegerLoadStorePair<0b10, 0b01, 0b0001, (outs GPR32:$Rt, GPR32:$Rt2), (ins GPR64sp0:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn]", "">; def LDIAPPX: BaseLRCPC3IntegerLoadStorePair<0b11, 0b01, 0b0001, (outs GPR64:$Rt, GPR64:$Rt2), (ins GPR64sp0:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn]", "">; def : Pat<(AArch64ldiapp GPR64sp:$Rn), (LDIAPPX GPR64sp:$Rn)>; def : Pat<(AArch64stilp GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), (STILPX GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn)>; // Aliases for when offset=0 def : InstAlias<"stilp\t$Rt, $Rt2, [$Rn, #0]", (STILPW GPR32: $Rt, GPR32: $Rt2, GPR64sp:$Rn)>; def : InstAlias<"stilp\t$Rt, $Rt2, [$Rn, #0]", (STILPX GPR64: $Rt, GPR64: $Rt2, GPR64sp:$Rn)>; // size opc def STLRWpre: BaseLRCPC3IntegerLoadStore<0b10, 0b10, (outs GPR64sp:$wback), (ins GPR32:$Rt, GPR64sp:$Rn), "stlr", "\t$Rt, [$Rn, #-4]!", "$Rn = $wback">; def STLRXpre: BaseLRCPC3IntegerLoadStore<0b11, 0b10, (outs GPR64sp:$wback), (ins GPR64:$Rt, GPR64sp:$Rn), "stlr", "\t$Rt, [$Rn, #-8]!", "$Rn = $wback">; def LDAPRWpost: BaseLRCPC3IntegerLoadStore<0b10, 0b11, (outs GPR64sp:$wback, GPR32:$Rt), (ins GPR64sp:$Rn), "ldapr", "\t$Rt, [$Rn], #4", "$Rn = $wback">; def LDAPRXpost: BaseLRCPC3IntegerLoadStore<0b11, 0b11, (outs GPR64sp:$wback, GPR64:$Rt), (ins GPR64sp:$Rn), "ldapr", "\t$Rt, [$Rn], #8", "$Rn = $wback">; } let Predicates = [HasRCPC3, HasNEON] in { // size opc regtype defm STLURb: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b00, FPR8 , (outs), (ins FPR8 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">; defm STLURh: LRCPC3NEONLoadStoreUnscaledOffset<0b01, 0b00, FPR16 , (outs), (ins FPR16 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">; defm STLURs: LRCPC3NEONLoadStoreUnscaledOffset<0b10, 0b00, FPR32 , (outs), (ins FPR32 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">; defm STLURd: LRCPC3NEONLoadStoreUnscaledOffset<0b11, 0b00, FPR64 , (outs), (ins FPR64 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">; defm STLURq: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b10, FPR128, (outs), (ins FPR128:$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">; defm LDAPURb: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b01, FPR8 , (outs FPR8 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">; defm LDAPURh: LRCPC3NEONLoadStoreUnscaledOffset<0b01, 0b01, FPR16 , (outs FPR16 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">; defm LDAPURs: LRCPC3NEONLoadStoreUnscaledOffset<0b10, 0b01, FPR32 , (outs FPR32 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">; defm LDAPURd: LRCPC3NEONLoadStoreUnscaledOffset<0b11, 0b01, FPR64 , (outs FPR64 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">; defm LDAPURq: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b11, FPR128, (outs FPR128:$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">; // L def STL1: LRCPC3NEONLdStSingle<0b0, (outs), (ins VecListOned:$Vt, VectorIndexD:$Q, GPR64sp:$Rn) , "stl1", "">; def LDAP1: LRCPC3NEONLdStSingle<0b1, (outs VecListOned:$dst), (ins VecListOned:$Vt, VectorIndexD:$Q, GPR64sp0:$Rn), "ldap1", "$Vt = $dst">; // Aliases for when offset=0 def : InstAlias<"stl1\t$Vt$Q, [$Rn, #0]", (STL1 VecListOned:$Vt, VectorIndexD:$Q, GPR64sp:$Rn)>; } //===----------------------------------------------------------------------===// // 128-bit System Instructions (FEAT_SYSINSTR128) //===----------------------------------------------------------------------===// let Predicates = [HasD128] in { def SYSPxt : SystemPXtI<0, "sysp">; def SYSPxt_XZR : BaseSystemI<0, (outs), (ins imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, SyspXzrPairOperand:$xzr_pair), "sysp", "\t$op1, $Cn, $Cm, $op2, $xzr_pair">, Sched<[WriteSys]> { // Had to use a custom decoder because tablegen interprets this as having 4 fields (why?) // and therefore autogenerates a decoder that builds an MC representation that has 4 fields // (decodeToMCInst), but when printing we expect the MC representation to have 5 fields (one // extra for the XZR) because AArch64InstPrinter::printInstruction in AArch64GenAsmWriter.inc // is based off of the asm template (maybe) and therefore wants to print 5 operands. // I could add a bits<5> xzr_pair. But without a way to constrain it to 0b11111 here it would // overlap with the main SYSP instruction. let DecoderMethod = "DecodeSyspXzrInstruction"; bits<3> op1; bits<4> Cn; bits<4> Cm; bits<3> op2; let Inst{22} = 0b1; // override BaseSystemI let Inst{20-19} = 0b01; let Inst{18-16} = op1; let Inst{15-12} = Cn; let Inst{11-8} = Cm; let Inst{7-5} = op2; let Inst{4-0} = 0b11111; } def : InstAlias<"sysp $op1, $Cn, $Cm, $op2", (SYSPxt_XZR imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, XZR)>; } //--- // 128-bit System Registers (FEAT_SYSREG128) //--- // Instruction encoding: // // 31 22|21|20|19|18 16|15 12|11 8|7 5|4 0 // MRRS 1101010101| 1| 1|o0| op1| Cn| Cm|op2| Rt // MSRR 1101010101| 0| 1|o0| op1| Cn| Cm|op2| Rt // Instruction syntax: // // MRRS , , ____> // MSRR ____>, , // // ...where t is even (X0, X2, etc). let Predicates = [HasD128] in { def MRRS : RtSystemI128<1, (outs MrrsMssrPairClassOperand:$Rt), (ins mrs_sysreg_op:$systemreg), "mrrs", "\t$Rt, $systemreg"> { bits<16> systemreg; let Inst{20-5} = systemreg; } def MSRR : RtSystemI128<0, (outs), (ins msr_sysreg_op:$systemreg, MrrsMssrPairClassOperand:$Rt), "msrr", "\t$systemreg, $Rt"> { bits<16> systemreg; let Inst{20-5} = systemreg; } } //===----------------------------===// // 2023 Architecture Extensions: //===----------------------------===// let Predicates = [HasFP8] in { defm F1CVTL : SIMDMixedTwoVectorFP8<0b00, "f1cvtl">; defm F2CVTL : SIMDMixedTwoVectorFP8<0b01, "f2cvtl">; defm BF1CVTL : SIMDMixedTwoVectorFP8<0b10, "bf1cvtl">; defm BF2CVTL : SIMDMixedTwoVectorFP8<0b11, "bf2cvtl">; defm FCVTN_F16_F8 : SIMDThreeSameSizeVectorCvt<"fcvtn">; defm FCVTN_F32_F8 : SIMDThreeVectorCvt<"fcvtn">; defm FSCALE : SIMDThreeSameVectorFP<0b1, 0b1, 0b111, "fscale", null_frag>; } // End let Predicates = [HasFP8] let Predicates = [HasFAMINMAX] in { defm FAMAX : SIMDThreeSameVectorFP<0b0, 0b1, 0b011, "famax", null_frag>; defm FAMIN : SIMDThreeSameVectorFP<0b1, 0b1, 0b011, "famin", null_frag>; } // End let Predicates = [HasFAMAXMIN] let Predicates = [HasFP8FMA] in { defm FMLALBlane : SIMDThreeSameVectorMLAIndex<0b0, "fmlalb">; defm FMLALTlane : SIMDThreeSameVectorMLAIndex<0b1, "fmlalt">; defm FMLALLBBlane : SIMDThreeSameVectorMLALIndex<0b0, 0b00, "fmlallbb">; defm FMLALLBTlane : SIMDThreeSameVectorMLALIndex<0b0, 0b01, "fmlallbt">; defm FMLALLTBlane : SIMDThreeSameVectorMLALIndex<0b1, 0b00, "fmlalltb">; defm FMLALLTTlane : SIMDThreeSameVectorMLALIndex<0b1, 0b01, "fmlalltt">; defm FMLALB : SIMDThreeSameVectorMLA<0b0, "fmlalb">; defm FMLALT : SIMDThreeSameVectorMLA<0b1, "fmlalt">; defm FMLALLBB : SIMDThreeSameVectorMLAL<0b0, 0b00, "fmlallbb">; defm FMLALLBT : SIMDThreeSameVectorMLAL<0b0, 0b01, "fmlallbt">; defm FMLALLTB : SIMDThreeSameVectorMLAL<0b1, 0b00, "fmlalltb">; defm FMLALLTT : SIMDThreeSameVectorMLAL<0b1, 0b01, "fmlalltt">; } // End let Predicates = [HasFP8FMA] let Predicates = [HasFP8DOT2] in { defm FDOTlane : SIMDThreeSameVectorFP8DOT2Index<"fdot">; defm FDOT : SIMDThreeSameVectorDOT2<"fdot">; } // End let Predicates = [HasFP8DOT2] let Predicates = [HasFP8DOT4] in { defm FDOTlane : SIMDThreeSameVectorFP8DOT4Index<"fdot">; defm FDOT : SIMDThreeSameVectorDOT4<"fdot">; } // End let Predicates = [HasFP8DOT4] //===----------------------------------------------------------------------===// // Checked Pointer Arithmetic (FEAT_CPA) //===----------------------------------------------------------------------===// let Predicates = [HasCPA] in { // Scalar add/subtract defm ADDPT : AddSubCPA<0, "addpt">; defm SUBPT : AddSubCPA<1, "subpt">; // Scalar multiply-add/subtract def MADDPT : MulAccumCPA<0, "maddpt">; def MSUBPT : MulAccumCPA<1, "msubpt">; } def round_v4fp32_to_v4bf16 : OutPatFrag<(ops node:$Rn), // NaN? Round : Quiet(NaN) (BSPv16i8 (FCMEQv4f32 $Rn, $Rn), (ADDv4i32 (ADDv4i32 $Rn, // Extract the LSB of the fp32 *truncated* to bf16. (ANDv16i8 (USHRv4i32_shift V128:$Rn, (i32 16)), (MOVIv4i32 (i32 1), (i32 0)))), // Bias which will help us break ties correctly. (MOVIv4s_msl (i32 127), (i32 264))), // Set the quiet bit in the NaN. (ORRv4i32 $Rn, (i32 64), (i32 16)))>; multiclass PromoteUnaryv8f16Tov4f32 { let Predicates = [HasNoFullFP16] in def : Pat<(InOp (v8f16 V128:$Rn)), (v8f16 (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), (v4f16 (FCVTNv4i16 (v4f32 (OutInst (v4f32 (FCVTLv4i16 (v4i16 (EXTRACT_SUBREG V128:$Rn, dsub)))))))), dsub), (v4f32 (OutInst (v4f32 (FCVTLv8i16 V128:$Rn))))))>; let Predicates = [HasBF16] in def : Pat<(InOp (v8bf16 V128:$Rn)), (v8bf16 (BFCVTN2 (v8bf16 (BFCVTN (v4f32 (OutInst (v4f32 (SHLLv4i16 (v4i16 (EXTRACT_SUBREG V128:$Rn, dsub)))))))), (v4f32 (OutInst (v4f32 (SHLLv8i16 V128:$Rn))))))>; let Predicates = [HasNoBF16] in def : Pat<(InOp (v8bf16 V128:$Rn)), (UZP2v8i16 (round_v4fp32_to_v4bf16 (v4f32 (OutInst (v4f32 (SHLLv4i16 (v4i16 (EXTRACT_SUBREG V128:$Rn, dsub))))))), (round_v4fp32_to_v4bf16 (v4f32 (OutInst (v4f32 (SHLLv8i16 V128:$Rn))))))>; } defm : PromoteUnaryv8f16Tov4f32; defm : PromoteUnaryv8f16Tov4f32; defm : PromoteUnaryv8f16Tov4f32; defm : PromoteUnaryv8f16Tov4f32; defm : PromoteUnaryv8f16Tov4f32; defm : PromoteUnaryv8f16Tov4f32; defm : PromoteUnaryv8f16Tov4f32; multiclass PromoteBinaryv8f16Tov4f32 { let Predicates = [HasNoFullFP16] in def : Pat<(InOp (v8f16 V128:$Rn), (v8f16 V128:$Rm)), (v8f16 (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), (v4f16 (FCVTNv4i16 (v4f32 (OutInst (v4f32 (FCVTLv4i16 (v4i16 (EXTRACT_SUBREG V128:$Rn, dsub)))), (v4f32 (FCVTLv4i16 (v4i16 (EXTRACT_SUBREG V128:$Rm, dsub)))))))), dsub), (v4f32 (OutInst (v4f32 (FCVTLv8i16 V128:$Rn)), (v4f32 (FCVTLv8i16 V128:$Rm))))))>; let Predicates = [HasBF16] in def : Pat<(InOp (v8bf16 V128:$Rn), (v8bf16 V128:$Rm)), (v8bf16 (BFCVTN2 (v8bf16 (BFCVTN (v4f32 (OutInst (v4f32 (SHLLv4i16 (v4i16 (EXTRACT_SUBREG V128:$Rn, dsub)))), (v4f32 (SHLLv4i16 (v4i16 (EXTRACT_SUBREG V128:$Rm, dsub)))))))), (v4f32 (OutInst (v4f32 (SHLLv8i16 V128:$Rn)), (v4f32 (SHLLv8i16 V128:$Rm))))))>; let Predicates = [HasNoBF16] in def : Pat<(InOp (v8bf16 V128:$Rn), (v8bf16 V128:$Rm)), (UZP2v8i16 (round_v4fp32_to_v4bf16 (v4f32 (OutInst (v4f32 (SHLLv4i16 (v4i16 (EXTRACT_SUBREG V128:$Rn, dsub)))), (v4f32 (SHLLv4i16 (v4i16 (EXTRACT_SUBREG V128:$Rm, dsub))))))), (round_v4fp32_to_v4bf16 (v4f32 (OutInst (v4f32 (SHLLv8i16 V128:$Rn)), (v4f32 (SHLLv8i16 V128:$Rm))))))>; } defm : PromoteBinaryv8f16Tov4f32; defm : PromoteBinaryv8f16Tov4f32; defm : PromoteBinaryv8f16Tov4f32; defm : PromoteBinaryv8f16Tov4f32; include "AArch64InstrAtomics.td" include "AArch64SVEInstrInfo.td" include "AArch64SMEInstrInfo.td" include "AArch64InstrGISel.td" diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp index 0863345b0c6d..c9636b2c7025 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp @@ -1,570 +1,575 @@ //===-- RISCVMCCodeEmitter.cpp - Convert RISC-V code to machine code ------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements the RISCVMCCodeEmitter class. // //===----------------------------------------------------------------------===// #include "MCTargetDesc/RISCVBaseInfo.h" #include "MCTargetDesc/RISCVFixupKinds.h" #include "MCTargetDesc/RISCVMCExpr.h" #include "MCTargetDesc/RISCVMCTargetDesc.h" #include "llvm/ADT/Statistic.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Casting.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; #define DEBUG_TYPE "mccodeemitter" STATISTIC(MCNumEmitted, "Number of MC instructions emitted"); STATISTIC(MCNumFixups, "Number of MC fixups created"); namespace { class RISCVMCCodeEmitter : public MCCodeEmitter { RISCVMCCodeEmitter(const RISCVMCCodeEmitter &) = delete; void operator=(const RISCVMCCodeEmitter &) = delete; MCContext &Ctx; MCInstrInfo const &MCII; public: RISCVMCCodeEmitter(MCContext &ctx, MCInstrInfo const &MCII) : Ctx(ctx), MCII(MCII) {} ~RISCVMCCodeEmitter() override = default; void encodeInstruction(const MCInst &MI, SmallVectorImpl &CB, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const override; void expandFunctionCall(const MCInst &MI, SmallVectorImpl &CB, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; void expandTLSDESCCall(const MCInst &MI, SmallVectorImpl &CB, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; void expandAddTPRel(const MCInst &MI, SmallVectorImpl &CB, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; void expandLongCondBr(const MCInst &MI, SmallVectorImpl &CB, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; /// TableGen'erated function for getting the binary encoding for an /// instruction. uint64_t getBinaryCodeForInstr(const MCInst &MI, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; /// Return binary encoding of operand. If the machine operand requires /// relocation, record the relocation and return zero. unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; unsigned getImmOpValueAsr1(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; unsigned getImmOpValue(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; unsigned getVMaskReg(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; unsigned getRlistOpValue(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; unsigned getRegReg(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; }; } // end anonymous namespace MCCodeEmitter *llvm::createRISCVMCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx) { return new RISCVMCCodeEmitter(Ctx, MCII); } // Expand PseudoCALL(Reg), PseudoTAIL and PseudoJump to AUIPC and JALR with // relocation types. We expand those pseudo-instructions while encoding them, // meaning AUIPC and JALR won't go through RISC-V MC to MC compressed // instruction transformation. This is acceptable because AUIPC has no 16-bit // form and C_JALR has no immediate operand field. We let linker relaxation // deal with it. When linker relaxation is enabled, AUIPC and JALR have a // chance to relax to JAL. // If the C extension is enabled, JAL has a chance relax to C_JAL. void RISCVMCCodeEmitter::expandFunctionCall(const MCInst &MI, SmallVectorImpl &CB, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { MCInst TmpInst; MCOperand Func; MCRegister Ra; if (MI.getOpcode() == RISCV::PseudoTAIL) { Func = MI.getOperand(0); Ra = RISCV::X6; // For Zicfilp, PseudoTAIL should be expanded to a software guarded branch. // It means to use t2(x7) as rs1 of JALR to expand PseudoTAIL. if (STI.hasFeature(RISCV::FeatureStdExtZicfilp)) Ra = RISCV::X7; } else if (MI.getOpcode() == RISCV::PseudoCALLReg) { Func = MI.getOperand(1); Ra = MI.getOperand(0).getReg(); } else if (MI.getOpcode() == RISCV::PseudoCALL) { Func = MI.getOperand(0); Ra = RISCV::X1; } else if (MI.getOpcode() == RISCV::PseudoJump) { Func = MI.getOperand(1); Ra = MI.getOperand(0).getReg(); } uint32_t Binary; assert(Func.isExpr() && "Expected expression"); const MCExpr *CallExpr = Func.getExpr(); // Emit AUIPC Ra, Func with R_RISCV_CALL relocation type. TmpInst = MCInstBuilder(RISCV::AUIPC).addReg(Ra).addExpr(CallExpr); Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); support::endian::write(CB, Binary, llvm::endianness::little); if (MI.getOpcode() == RISCV::PseudoTAIL || MI.getOpcode() == RISCV::PseudoJump) // Emit JALR X0, Ra, 0 TmpInst = MCInstBuilder(RISCV::JALR).addReg(RISCV::X0).addReg(Ra).addImm(0); else // Emit JALR Ra, Ra, 0 TmpInst = MCInstBuilder(RISCV::JALR).addReg(Ra).addReg(Ra).addImm(0); Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); support::endian::write(CB, Binary, llvm::endianness::little); } void RISCVMCCodeEmitter::expandTLSDESCCall(const MCInst &MI, SmallVectorImpl &CB, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { MCOperand SrcSymbol = MI.getOperand(3); assert(SrcSymbol.isExpr() && "Expected expression as first input to TLSDESCCALL"); const RISCVMCExpr *Expr = dyn_cast(SrcSymbol.getExpr()); MCRegister Link = MI.getOperand(0).getReg(); MCRegister Dest = MI.getOperand(1).getReg(); MCRegister Imm = MI.getOperand(2).getImm(); Fixups.push_back(MCFixup::create( 0, Expr, MCFixupKind(RISCV::fixup_riscv_tlsdesc_call), MI.getLoc())); MCInst Call = MCInstBuilder(RISCV::JALR).addReg(Link).addReg(Dest).addImm(Imm); uint32_t Binary = getBinaryCodeForInstr(Call, Fixups, STI); support::endian::write(CB, Binary, llvm::endianness::little); } // Expand PseudoAddTPRel to a simple ADD with the correct relocation. void RISCVMCCodeEmitter::expandAddTPRel(const MCInst &MI, SmallVectorImpl &CB, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { MCOperand DestReg = MI.getOperand(0); MCOperand SrcReg = MI.getOperand(1); MCOperand TPReg = MI.getOperand(2); assert(TPReg.isReg() && TPReg.getReg() == RISCV::X4 && "Expected thread pointer as second input to TP-relative add"); MCOperand SrcSymbol = MI.getOperand(3); assert(SrcSymbol.isExpr() && "Expected expression as third input to TP-relative add"); const RISCVMCExpr *Expr = dyn_cast(SrcSymbol.getExpr()); assert(Expr && Expr->getKind() == RISCVMCExpr::VK_RISCV_TPREL_ADD && "Expected tprel_add relocation on TP-relative symbol"); // Emit the correct tprel_add relocation for the symbol. Fixups.push_back(MCFixup::create( 0, Expr, MCFixupKind(RISCV::fixup_riscv_tprel_add), MI.getLoc())); // Emit fixup_riscv_relax for tprel_add where the relax feature is enabled. if (STI.hasFeature(RISCV::FeatureRelax)) { const MCConstantExpr *Dummy = MCConstantExpr::create(0, Ctx); Fixups.push_back(MCFixup::create( 0, Dummy, MCFixupKind(RISCV::fixup_riscv_relax), MI.getLoc())); } // Emit a normal ADD instruction with the given operands. MCInst TmpInst = MCInstBuilder(RISCV::ADD) .addOperand(DestReg) .addOperand(SrcReg) .addOperand(TPReg); uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); support::endian::write(CB, Binary, llvm::endianness::little); } static unsigned getInvertedBranchOp(unsigned BrOp) { switch (BrOp) { default: llvm_unreachable("Unexpected branch opcode!"); case RISCV::PseudoLongBEQ: return RISCV::BNE; case RISCV::PseudoLongBNE: return RISCV::BEQ; case RISCV::PseudoLongBLT: return RISCV::BGE; case RISCV::PseudoLongBGE: return RISCV::BLT; case RISCV::PseudoLongBLTU: return RISCV::BGEU; case RISCV::PseudoLongBGEU: return RISCV::BLTU; } } // Expand PseudoLongBxx to an inverted conditional branch and an unconditional // jump. void RISCVMCCodeEmitter::expandLongCondBr(const MCInst &MI, SmallVectorImpl &CB, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { MCRegister SrcReg1 = MI.getOperand(0).getReg(); MCRegister SrcReg2 = MI.getOperand(1).getReg(); MCOperand SrcSymbol = MI.getOperand(2); unsigned Opcode = MI.getOpcode(); bool IsEqTest = Opcode == RISCV::PseudoLongBNE || Opcode == RISCV::PseudoLongBEQ; bool UseCompressedBr = false; if (IsEqTest && (STI.hasFeature(RISCV::FeatureStdExtC) || STI.hasFeature(RISCV::FeatureStdExtZca))) { if (RISCV::X8 <= SrcReg1.id() && SrcReg1.id() <= RISCV::X15 && SrcReg2.id() == RISCV::X0) { UseCompressedBr = true; } else if (RISCV::X8 <= SrcReg2.id() && SrcReg2.id() <= RISCV::X15 && SrcReg1.id() == RISCV::X0) { std::swap(SrcReg1, SrcReg2); UseCompressedBr = true; } } uint32_t Offset; if (UseCompressedBr) { unsigned InvOpc = Opcode == RISCV::PseudoLongBNE ? RISCV::C_BEQZ : RISCV::C_BNEZ; MCInst TmpInst = MCInstBuilder(InvOpc).addReg(SrcReg1).addImm(6); uint16_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); support::endian::write(CB, Binary, llvm::endianness::little); Offset = 2; } else { unsigned InvOpc = getInvertedBranchOp(Opcode); MCInst TmpInst = MCInstBuilder(InvOpc).addReg(SrcReg1).addReg(SrcReg2).addImm(8); uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); support::endian::write(CB, Binary, llvm::endianness::little); Offset = 4; } + // Save the number fixups. + size_t FixupStartIndex = Fixups.size(); + // Emit an unconditional jump to the destination. MCInst TmpInst = MCInstBuilder(RISCV::JAL).addReg(RISCV::X0).addOperand(SrcSymbol); uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); support::endian::write(CB, Binary, llvm::endianness::little); - Fixups.clear(); + // Drop any fixup added so we can add the correct one. + Fixups.resize(FixupStartIndex); + if (SrcSymbol.isExpr()) { Fixups.push_back(MCFixup::create(Offset, SrcSymbol.getExpr(), MCFixupKind(RISCV::fixup_riscv_jal), MI.getLoc())); } } void RISCVMCCodeEmitter::encodeInstruction(const MCInst &MI, SmallVectorImpl &CB, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); // Get byte count of instruction. unsigned Size = Desc.getSize(); // RISCVInstrInfo::getInstSizeInBytes expects that the total size of the // expanded instructions for each pseudo is correct in the Size field of the // tablegen definition for the pseudo. switch (MI.getOpcode()) { default: break; case RISCV::PseudoCALLReg: case RISCV::PseudoCALL: case RISCV::PseudoTAIL: case RISCV::PseudoJump: expandFunctionCall(MI, CB, Fixups, STI); MCNumEmitted += 2; return; case RISCV::PseudoAddTPRel: expandAddTPRel(MI, CB, Fixups, STI); MCNumEmitted += 1; return; case RISCV::PseudoLongBEQ: case RISCV::PseudoLongBNE: case RISCV::PseudoLongBLT: case RISCV::PseudoLongBGE: case RISCV::PseudoLongBLTU: case RISCV::PseudoLongBGEU: expandLongCondBr(MI, CB, Fixups, STI); MCNumEmitted += 2; return; case RISCV::PseudoTLSDESCCall: expandTLSDESCCall(MI, CB, Fixups, STI); MCNumEmitted += 1; return; } switch (Size) { default: llvm_unreachable("Unhandled encodeInstruction length!"); case 2: { uint16_t Bits = getBinaryCodeForInstr(MI, Fixups, STI); support::endian::write(CB, Bits, llvm::endianness::little); break; } case 4: { uint32_t Bits = getBinaryCodeForInstr(MI, Fixups, STI); support::endian::write(CB, Bits, llvm::endianness::little); break; } } ++MCNumEmitted; // Keep track of the # of mi's emitted. } unsigned RISCVMCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { if (MO.isReg()) return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg()); if (MO.isImm()) return static_cast(MO.getImm()); llvm_unreachable("Unhandled expression!"); return 0; } unsigned RISCVMCCodeEmitter::getImmOpValueAsr1(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpNo); if (MO.isImm()) { unsigned Res = MO.getImm(); assert((Res & 1) == 0 && "LSB is non-zero"); return Res >> 1; } return getImmOpValue(MI, OpNo, Fixups, STI); } unsigned RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { bool EnableRelax = STI.hasFeature(RISCV::FeatureRelax); const MCOperand &MO = MI.getOperand(OpNo); MCInstrDesc const &Desc = MCII.get(MI.getOpcode()); unsigned MIFrm = RISCVII::getFormat(Desc.TSFlags); // If the destination is an immediate, there is nothing to do. if (MO.isImm()) return MO.getImm(); assert(MO.isExpr() && "getImmOpValue expects only expressions or immediates"); const MCExpr *Expr = MO.getExpr(); MCExpr::ExprKind Kind = Expr->getKind(); RISCV::Fixups FixupKind = RISCV::fixup_riscv_invalid; bool RelaxCandidate = false; if (Kind == MCExpr::Target) { const RISCVMCExpr *RVExpr = cast(Expr); switch (RVExpr->getKind()) { case RISCVMCExpr::VK_RISCV_None: case RISCVMCExpr::VK_RISCV_Invalid: case RISCVMCExpr::VK_RISCV_32_PCREL: llvm_unreachable("Unhandled fixup kind!"); case RISCVMCExpr::VK_RISCV_TPREL_ADD: // tprel_add is only used to indicate that a relocation should be emitted // for an add instruction used in TP-relative addressing. It should not be // expanded as if representing an actual instruction operand and so to // encounter it here is an error. llvm_unreachable( "VK_RISCV_TPREL_ADD should not represent an instruction operand"); case RISCVMCExpr::VK_RISCV_LO: if (MIFrm == RISCVII::InstFormatI) FixupKind = RISCV::fixup_riscv_lo12_i; else if (MIFrm == RISCVII::InstFormatS) FixupKind = RISCV::fixup_riscv_lo12_s; else llvm_unreachable("VK_RISCV_LO used with unexpected instruction format"); RelaxCandidate = true; break; case RISCVMCExpr::VK_RISCV_HI: FixupKind = RISCV::fixup_riscv_hi20; RelaxCandidate = true; break; case RISCVMCExpr::VK_RISCV_PCREL_LO: if (MIFrm == RISCVII::InstFormatI) FixupKind = RISCV::fixup_riscv_pcrel_lo12_i; else if (MIFrm == RISCVII::InstFormatS) FixupKind = RISCV::fixup_riscv_pcrel_lo12_s; else llvm_unreachable( "VK_RISCV_PCREL_LO used with unexpected instruction format"); RelaxCandidate = true; break; case RISCVMCExpr::VK_RISCV_PCREL_HI: FixupKind = RISCV::fixup_riscv_pcrel_hi20; RelaxCandidate = true; break; case RISCVMCExpr::VK_RISCV_GOT_HI: FixupKind = RISCV::fixup_riscv_got_hi20; break; case RISCVMCExpr::VK_RISCV_TPREL_LO: if (MIFrm == RISCVII::InstFormatI) FixupKind = RISCV::fixup_riscv_tprel_lo12_i; else if (MIFrm == RISCVII::InstFormatS) FixupKind = RISCV::fixup_riscv_tprel_lo12_s; else llvm_unreachable( "VK_RISCV_TPREL_LO used with unexpected instruction format"); RelaxCandidate = true; break; case RISCVMCExpr::VK_RISCV_TPREL_HI: FixupKind = RISCV::fixup_riscv_tprel_hi20; RelaxCandidate = true; break; case RISCVMCExpr::VK_RISCV_TLS_GOT_HI: FixupKind = RISCV::fixup_riscv_tls_got_hi20; break; case RISCVMCExpr::VK_RISCV_TLS_GD_HI: FixupKind = RISCV::fixup_riscv_tls_gd_hi20; break; case RISCVMCExpr::VK_RISCV_CALL: FixupKind = RISCV::fixup_riscv_call; RelaxCandidate = true; break; case RISCVMCExpr::VK_RISCV_CALL_PLT: FixupKind = RISCV::fixup_riscv_call_plt; RelaxCandidate = true; break; case RISCVMCExpr::VK_RISCV_TLSDESC_HI: FixupKind = RISCV::fixup_riscv_tlsdesc_hi20; break; case RISCVMCExpr::VK_RISCV_TLSDESC_LOAD_LO: FixupKind = RISCV::fixup_riscv_tlsdesc_load_lo12; break; case RISCVMCExpr::VK_RISCV_TLSDESC_ADD_LO: FixupKind = RISCV::fixup_riscv_tlsdesc_add_lo12; break; case RISCVMCExpr::VK_RISCV_TLSDESC_CALL: FixupKind = RISCV::fixup_riscv_tlsdesc_call; break; } } else if ((Kind == MCExpr::SymbolRef && cast(Expr)->getKind() == MCSymbolRefExpr::VK_None) || Kind == MCExpr::Binary) { // FIXME: Sub kind binary exprs have chance of underflow. if (MIFrm == RISCVII::InstFormatJ) { FixupKind = RISCV::fixup_riscv_jal; } else if (MIFrm == RISCVII::InstFormatB) { FixupKind = RISCV::fixup_riscv_branch; } else if (MIFrm == RISCVII::InstFormatCJ) { FixupKind = RISCV::fixup_riscv_rvc_jump; } else if (MIFrm == RISCVII::InstFormatCB) { FixupKind = RISCV::fixup_riscv_rvc_branch; } else if (MIFrm == RISCVII::InstFormatI) { FixupKind = RISCV::fixup_riscv_12_i; } } assert(FixupKind != RISCV::fixup_riscv_invalid && "Unhandled expression!"); Fixups.push_back( MCFixup::create(0, Expr, MCFixupKind(FixupKind), MI.getLoc())); ++MCNumFixups; // Ensure an R_RISCV_RELAX relocation will be emitted if linker relaxation is // enabled and the current fixup will result in a relocation that may be // relaxed. if (EnableRelax && RelaxCandidate) { const MCConstantExpr *Dummy = MCConstantExpr::create(0, Ctx); Fixups.push_back( MCFixup::create(0, Dummy, MCFixupKind(RISCV::fixup_riscv_relax), MI.getLoc())); ++MCNumFixups; } return 0; } unsigned RISCVMCCodeEmitter::getVMaskReg(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { MCOperand MO = MI.getOperand(OpNo); assert(MO.isReg() && "Expected a register."); switch (MO.getReg()) { default: llvm_unreachable("Invalid mask register."); case RISCV::V0: return 0; case RISCV::NoRegister: return 1; } } unsigned RISCVMCCodeEmitter::getRlistOpValue(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpNo); assert(MO.isImm() && "Rlist operand must be immediate"); auto Imm = MO.getImm(); assert(Imm >= 4 && "EABI is currently not implemented"); return Imm; } unsigned RISCVMCCodeEmitter::getRegReg(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpNo); const MCOperand &MO1 = MI.getOperand(OpNo + 1); assert(MO.isReg() && MO1.isReg() && "Expected registers."); unsigned Op = Ctx.getRegisterInfo()->getEncodingValue(MO.getReg()); unsigned Op1 = Ctx.getRegisterInfo()->getEncodingValue(MO1.getReg()); return Op | Op1 << 5; } #include "RISCVGenMCCodeEmitter.inc" diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index b2b88143354a..383393914a16 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1,9737 +1,9738 @@ //===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements the SystemZTargetLowering class. // //===----------------------------------------------------------------------===// #include "SystemZISelLowering.h" #include "SystemZCallingConv.h" #include "SystemZConstantPoolValue.h" #include "SystemZMachineFunctionInfo.h" #include "SystemZTargetMachine.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsS390.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include #include using namespace llvm; #define DEBUG_TYPE "systemz-lower" namespace { // Represents information about a comparison. struct Comparison { Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn) : Op0(Op0In), Op1(Op1In), Chain(ChainIn), Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {} // The operands to the comparison. SDValue Op0, Op1; // Chain if this is a strict floating-point comparison. SDValue Chain; // The opcode that should be used to compare Op0 and Op1. unsigned Opcode; // A SystemZICMP value. Only used for integer comparisons. unsigned ICmpType; // The mask of CC values that Opcode can produce. unsigned CCValid; // The mask of CC values for which the original condition is true. unsigned CCMask; }; } // end anonymous namespace // Classify VT as either 32 or 64 bit. static bool is32Bit(EVT VT) { switch (VT.getSimpleVT().SimpleTy) { case MVT::i32: return true; case MVT::i64: return false; default: llvm_unreachable("Unsupported type"); } } // Return a version of MachineOperand that can be safely used before the // final use. static MachineOperand earlyUseOperand(MachineOperand Op) { if (Op.isReg()) Op.setIsKill(false); return Op; } SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, const SystemZSubtarget &STI) : TargetLowering(TM), Subtarget(STI) { MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0)); auto *Regs = STI.getSpecialRegisters(); // Set up the register classes. if (Subtarget.hasHighWord()) addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass); else addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass); addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass); if (!useSoftFloat()) { if (Subtarget.hasVector()) { addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass); addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass); } else { addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass); addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass); } if (Subtarget.hasVectorEnhancements1()) addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass); else addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass); if (Subtarget.hasVector()) { addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass); addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass); addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass); addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass); addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass); addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass); } if (Subtarget.hasVector()) addRegisterClass(MVT::i128, &SystemZ::VR128BitRegClass); } // Compute derived properties from the register classes computeRegisterProperties(Subtarget.getRegisterInfo()); // Set up special registers. setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister()); // TODO: It may be better to default to latency-oriented scheduling, however // LLVM's current latency-oriented scheduler can't handle physreg definitions // such as SystemZ has with CC, so set this to the register-pressure // scheduler, because it can. setSchedulingPreference(Sched::RegPressure); setBooleanContents(ZeroOrOneBooleanContent); setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); setMaxAtomicSizeInBitsSupported(128); // Instructions are strings of 2-byte aligned 2-byte values. setMinFunctionAlignment(Align(2)); // For performance reasons we prefer 16-byte alignment. setPrefFunctionAlignment(Align(16)); // Handle operations that are handled in a similar way for all types. for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE; I <= MVT::LAST_FP_VALUETYPE; ++I) { MVT VT = MVT::SimpleValueType(I); if (isTypeLegal(VT)) { // Lower SET_CC into an IPM-based sequence. setOperationAction(ISD::SETCC, VT, Custom); setOperationAction(ISD::STRICT_FSETCC, VT, Custom); setOperationAction(ISD::STRICT_FSETCCS, VT, Custom); // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE). setOperationAction(ISD::SELECT, VT, Expand); // Lower SELECT_CC and BR_CC into separate comparisons and branches. setOperationAction(ISD::SELECT_CC, VT, Custom); setOperationAction(ISD::BR_CC, VT, Custom); } } // Expand jump table branches as address arithmetic followed by an // indirect jump. setOperationAction(ISD::BR_JT, MVT::Other, Expand); // Expand BRCOND into a BR_CC (see above). setOperationAction(ISD::BRCOND, MVT::Other, Expand); // Handle integer types except i128. for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE; I <= MVT::LAST_INTEGER_VALUETYPE; ++I) { MVT VT = MVT::SimpleValueType(I); if (isTypeLegal(VT) && VT != MVT::i128) { setOperationAction(ISD::ABS, VT, Legal); // Expand individual DIV and REMs into DIVREMs. setOperationAction(ISD::SDIV, VT, Expand); setOperationAction(ISD::UDIV, VT, Expand); setOperationAction(ISD::SREM, VT, Expand); setOperationAction(ISD::UREM, VT, Expand); setOperationAction(ISD::SDIVREM, VT, Custom); setOperationAction(ISD::UDIVREM, VT, Custom); // Support addition/subtraction with overflow. setOperationAction(ISD::SADDO, VT, Custom); setOperationAction(ISD::SSUBO, VT, Custom); // Support addition/subtraction with carry. setOperationAction(ISD::UADDO, VT, Custom); setOperationAction(ISD::USUBO, VT, Custom); // Support carry in as value rather than glue. setOperationAction(ISD::UADDO_CARRY, VT, Custom); setOperationAction(ISD::USUBO_CARRY, VT, Custom); // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are // available, or if the operand is constant. setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); // Use POPCNT on z196 and above. if (Subtarget.hasPopulationCount()) setOperationAction(ISD::CTPOP, VT, Custom); else setOperationAction(ISD::CTPOP, VT, Expand); // No special instructions for these. setOperationAction(ISD::CTTZ, VT, Expand); setOperationAction(ISD::ROTR, VT, Expand); // Use *MUL_LOHI where possible instead of MULH*. setOperationAction(ISD::MULHS, VT, Expand); setOperationAction(ISD::MULHU, VT, Expand); setOperationAction(ISD::SMUL_LOHI, VT, Custom); setOperationAction(ISD::UMUL_LOHI, VT, Custom); // Only z196 and above have native support for conversions to unsigned. // On z10, promoting to i64 doesn't generate an inexact condition for // values that are outside the i32 range but in the i64 range, so use // the default expansion. if (!Subtarget.hasFPExtension()) setOperationAction(ISD::FP_TO_UINT, VT, Expand); // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all // default to Expand, so need to be modified to Legal where appropriate. setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Legal); if (Subtarget.hasFPExtension()) setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Legal); // And similarly for STRICT_[SU]INT_TO_FP. setOperationAction(ISD::STRICT_SINT_TO_FP, VT, Legal); if (Subtarget.hasFPExtension()) setOperationAction(ISD::STRICT_UINT_TO_FP, VT, Legal); } } // Handle i128 if legal. if (isTypeLegal(MVT::i128)) { // No special instructions for these. setOperationAction(ISD::SDIVREM, MVT::i128, Expand); setOperationAction(ISD::UDIVREM, MVT::i128, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i128, Expand); setOperationAction(ISD::UMUL_LOHI, MVT::i128, Expand); setOperationAction(ISD::ROTR, MVT::i128, Expand); setOperationAction(ISD::ROTL, MVT::i128, Expand); setOperationAction(ISD::MUL, MVT::i128, Expand); setOperationAction(ISD::MULHS, MVT::i128, Expand); setOperationAction(ISD::MULHU, MVT::i128, Expand); setOperationAction(ISD::SDIV, MVT::i128, Expand); setOperationAction(ISD::UDIV, MVT::i128, Expand); setOperationAction(ISD::SREM, MVT::i128, Expand); setOperationAction(ISD::UREM, MVT::i128, Expand); setOperationAction(ISD::CTLZ, MVT::i128, Expand); setOperationAction(ISD::CTTZ, MVT::i128, Expand); // Support addition/subtraction with carry. setOperationAction(ISD::UADDO, MVT::i128, Custom); setOperationAction(ISD::USUBO, MVT::i128, Custom); setOperationAction(ISD::UADDO_CARRY, MVT::i128, Custom); setOperationAction(ISD::USUBO_CARRY, MVT::i128, Custom); // Use VPOPCT and add up partial results. setOperationAction(ISD::CTPOP, MVT::i128, Custom); // We have to use libcalls for these. setOperationAction(ISD::FP_TO_UINT, MVT::i128, LibCall); setOperationAction(ISD::FP_TO_SINT, MVT::i128, LibCall); setOperationAction(ISD::UINT_TO_FP, MVT::i128, LibCall); setOperationAction(ISD::SINT_TO_FP, MVT::i128, LibCall); setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, LibCall); setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, LibCall); setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, LibCall); setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, LibCall); } // Type legalization will convert 8- and 16-bit atomic operations into // forms that operate on i32s (but still keeping the original memory VT). // Lower them into full i32 operations. setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom); // Whether or not i128 is not a legal type, we need to custom lower // the atomic operations in order to exploit SystemZ instructions. setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom); setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom); setOperationAction(ISD::ATOMIC_LOAD, MVT::f128, Custom); setOperationAction(ISD::ATOMIC_STORE, MVT::f128, Custom); // Mark sign/zero extending atomic loads as legal, which will make // DAGCombiner fold extensions into atomic loads if possible. setAtomicLoadExtAction({ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i64, {MVT::i8, MVT::i16, MVT::i32}, Legal); setAtomicLoadExtAction({ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i32, {MVT::i8, MVT::i16}, Legal); setAtomicLoadExtAction({ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i16, MVT::i8, Legal); // We can use the CC result of compare-and-swap to implement // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS. setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Custom); setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom); setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); // Traps are legal, as we will convert them to "j .+2". setOperationAction(ISD::TRAP, MVT::Other, Legal); // z10 has instructions for signed but not unsigned FP conversion. // Handle unsigned 32-bit types as signed 64-bit types. if (!Subtarget.hasFPExtension()) { setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Promote); setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand); } // We have native support for a 64-bit CTLZ, via FLOGR. setOperationAction(ISD::CTLZ, MVT::i32, Promote); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote); setOperationAction(ISD::CTLZ, MVT::i64, Legal); // On z15 we have native support for a 64-bit CTPOP. if (Subtarget.hasMiscellaneousExtensions3()) { setOperationAction(ISD::CTPOP, MVT::i32, Promote); setOperationAction(ISD::CTPOP, MVT::i64, Legal); } // Give LowerOperation the chance to replace 64-bit ORs with subregs. setOperationAction(ISD::OR, MVT::i64, Custom); // Expand 128 bit shifts without using a libcall. setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand); setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand); setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand); // Also expand 256 bit shifts if i128 is a legal type. if (isTypeLegal(MVT::i128)) { setOperationAction(ISD::SRL_PARTS, MVT::i128, Expand); setOperationAction(ISD::SHL_PARTS, MVT::i128, Expand); setOperationAction(ISD::SRA_PARTS, MVT::i128, Expand); } // Handle bitcast from fp128 to i128. if (!isTypeLegal(MVT::i128)) setOperationAction(ISD::BITCAST, MVT::i128, Custom); // We have native instructions for i8, i16 and i32 extensions, but not i1. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); for (MVT VT : MVT::integer_valuetypes()) { setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); } // Handle the various types of symbolic address. setOperationAction(ISD::ConstantPool, PtrVT, Custom); setOperationAction(ISD::GlobalAddress, PtrVT, Custom); setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom); setOperationAction(ISD::BlockAddress, PtrVT, Custom); setOperationAction(ISD::JumpTable, PtrVT, Custom); // We need to handle dynamic allocations specially because of the // 160-byte area at the bottom of the stack. setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom); setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, PtrVT, Custom); setOperationAction(ISD::STACKSAVE, MVT::Other, Custom); setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom); // Handle prefetches with PFD or PFDRL. setOperationAction(ISD::PREFETCH, MVT::Other, Custom); // Handle readcyclecounter with STCKF. setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom); for (MVT VT : MVT::fixedlen_vector_valuetypes()) { // Assume by default that all vector operations need to be expanded. for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode) if (getOperationAction(Opcode, VT) == Legal) setOperationAction(Opcode, VT, Expand); // Likewise all truncating stores and extending loads. for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { setTruncStoreAction(VT, InnerVT, Expand); setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); } if (isTypeLegal(VT)) { // These operations are legal for anything that can be stored in a // vector register, even if there is no native support for the format // as such. In particular, we can do these for v4f32 even though there // are no specific instructions for that format. setOperationAction(ISD::LOAD, VT, Legal); setOperationAction(ISD::STORE, VT, Legal); setOperationAction(ISD::VSELECT, VT, Legal); setOperationAction(ISD::BITCAST, VT, Legal); setOperationAction(ISD::UNDEF, VT, Legal); // Likewise, except that we need to replace the nodes with something // more specific. setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); } } // Handle integer vector types. for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { if (isTypeLegal(VT)) { // These operations have direct equivalents. setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal); setOperationAction(ISD::ADD, VT, Legal); setOperationAction(ISD::SUB, VT, Legal); if (VT != MVT::v2i64) setOperationAction(ISD::MUL, VT, Legal); setOperationAction(ISD::ABS, VT, Legal); setOperationAction(ISD::AND, VT, Legal); setOperationAction(ISD::OR, VT, Legal); setOperationAction(ISD::XOR, VT, Legal); if (Subtarget.hasVectorEnhancements1()) setOperationAction(ISD::CTPOP, VT, Legal); else setOperationAction(ISD::CTPOP, VT, Custom); setOperationAction(ISD::CTTZ, VT, Legal); setOperationAction(ISD::CTLZ, VT, Legal); // Convert a GPR scalar to a vector by inserting it into element 0. setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); // Use a series of unpacks for extensions. setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom); setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom); // Detect shifts/rotates by a scalar amount and convert them into // V*_BY_SCALAR. setOperationAction(ISD::SHL, VT, Custom); setOperationAction(ISD::SRA, VT, Custom); setOperationAction(ISD::SRL, VT, Custom); setOperationAction(ISD::ROTL, VT, Custom); // Add ISD::VECREDUCE_ADD as custom in order to implement // it with VZERO+VSUM setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands // and inverting the result as necessary. setOperationAction(ISD::SETCC, VT, Custom); } } if (Subtarget.hasVector()) { // There should be no need to check for float types other than v2f64 // since <2 x f32> isn't a legal type. setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal); setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal); setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal); setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal); setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f64, Legal); } if (Subtarget.hasVectorEnhancements2()) { setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v4f32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v4f32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v4f32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal); setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal); setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal); setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal); setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4f32, Legal); } // Handle floating-point types. for (unsigned I = MVT::FIRST_FP_VALUETYPE; I <= MVT::LAST_FP_VALUETYPE; ++I) { MVT VT = MVT::SimpleValueType(I); if (isTypeLegal(VT)) { // We can use FI for FRINT. setOperationAction(ISD::FRINT, VT, Legal); // We can use the extended form of FI for other rounding operations. if (Subtarget.hasFPExtension()) { setOperationAction(ISD::FNEARBYINT, VT, Legal); setOperationAction(ISD::FFLOOR, VT, Legal); setOperationAction(ISD::FCEIL, VT, Legal); setOperationAction(ISD::FTRUNC, VT, Legal); setOperationAction(ISD::FROUND, VT, Legal); } // No special instructions for these. setOperationAction(ISD::FSIN, VT, Expand); setOperationAction(ISD::FCOS, VT, Expand); setOperationAction(ISD::FSINCOS, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); // Special treatment. setOperationAction(ISD::IS_FPCLASS, VT, Custom); // Handle constrained floating-point operations. setOperationAction(ISD::STRICT_FADD, VT, Legal); setOperationAction(ISD::STRICT_FSUB, VT, Legal); setOperationAction(ISD::STRICT_FMUL, VT, Legal); setOperationAction(ISD::STRICT_FDIV, VT, Legal); setOperationAction(ISD::STRICT_FMA, VT, Legal); setOperationAction(ISD::STRICT_FSQRT, VT, Legal); setOperationAction(ISD::STRICT_FRINT, VT, Legal); setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal); setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal); if (Subtarget.hasFPExtension()) { setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal); setOperationAction(ISD::STRICT_FFLOOR, VT, Legal); setOperationAction(ISD::STRICT_FCEIL, VT, Legal); setOperationAction(ISD::STRICT_FROUND, VT, Legal); setOperationAction(ISD::STRICT_FTRUNC, VT, Legal); } } } // Handle floating-point vector types. if (Subtarget.hasVector()) { // Scalar-to-vector conversion is just a subreg. setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); // Some insertions and extractions can be done directly but others // need to go via integers. setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); // These operations have direct equivalents. setOperationAction(ISD::FADD, MVT::v2f64, Legal); setOperationAction(ISD::FNEG, MVT::v2f64, Legal); setOperationAction(ISD::FSUB, MVT::v2f64, Legal); setOperationAction(ISD::FMUL, MVT::v2f64, Legal); setOperationAction(ISD::FMA, MVT::v2f64, Legal); setOperationAction(ISD::FDIV, MVT::v2f64, Legal); setOperationAction(ISD::FABS, MVT::v2f64, Legal); setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); setOperationAction(ISD::FRINT, MVT::v2f64, Legal); setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); setOperationAction(ISD::FROUND, MVT::v2f64, Legal); // Handle constrained floating-point operations. setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal); setOperationAction(ISD::SETCC, MVT::v2f64, Custom); setOperationAction(ISD::SETCC, MVT::v4f32, Custom); setOperationAction(ISD::STRICT_FSETCC, MVT::v2f64, Custom); setOperationAction(ISD::STRICT_FSETCC, MVT::v4f32, Custom); if (Subtarget.hasVectorEnhancements1()) { setOperationAction(ISD::STRICT_FSETCCS, MVT::v2f64, Custom); setOperationAction(ISD::STRICT_FSETCCS, MVT::v4f32, Custom); } } // The vector enhancements facility 1 has instructions for these. if (Subtarget.hasVectorEnhancements1()) { setOperationAction(ISD::FADD, MVT::v4f32, Legal); setOperationAction(ISD::FNEG, MVT::v4f32, Legal); setOperationAction(ISD::FSUB, MVT::v4f32, Legal); setOperationAction(ISD::FMUL, MVT::v4f32, Legal); setOperationAction(ISD::FMA, MVT::v4f32, Legal); setOperationAction(ISD::FDIV, MVT::v4f32, Legal); setOperationAction(ISD::FABS, MVT::v4f32, Legal); setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); setOperationAction(ISD::FRINT, MVT::v4f32, Legal); setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); setOperationAction(ISD::FROUND, MVT::v4f32, Legal); setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); setOperationAction(ISD::FMAXIMUM, MVT::f64, Legal); setOperationAction(ISD::FMINNUM, MVT::f64, Legal); setOperationAction(ISD::FMINIMUM, MVT::f64, Legal); setOperationAction(ISD::FMAXNUM, MVT::v2f64, Legal); setOperationAction(ISD::FMAXIMUM, MVT::v2f64, Legal); setOperationAction(ISD::FMINNUM, MVT::v2f64, Legal); setOperationAction(ISD::FMINIMUM, MVT::v2f64, Legal); setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal); setOperationAction(ISD::FMINNUM, MVT::f32, Legal); setOperationAction(ISD::FMINIMUM, MVT::f32, Legal); setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal); setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal); setOperationAction(ISD::FMAXNUM, MVT::f128, Legal); setOperationAction(ISD::FMAXIMUM, MVT::f128, Legal); setOperationAction(ISD::FMINNUM, MVT::f128, Legal); setOperationAction(ISD::FMINIMUM, MVT::f128, Legal); // Handle constrained floating-point operations. setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal); for (auto VT : { MVT::f32, MVT::f64, MVT::f128, MVT::v4f32, MVT::v2f64 }) { setOperationAction(ISD::STRICT_FMAXNUM, VT, Legal); setOperationAction(ISD::STRICT_FMINNUM, VT, Legal); setOperationAction(ISD::STRICT_FMAXIMUM, VT, Legal); setOperationAction(ISD::STRICT_FMINIMUM, VT, Legal); } } // We only have fused f128 multiply-addition on vector registers. if (!Subtarget.hasVectorEnhancements1()) { setOperationAction(ISD::FMA, MVT::f128, Expand); setOperationAction(ISD::STRICT_FMA, MVT::f128, Expand); } // We don't have a copysign instruction on vector registers. if (Subtarget.hasVectorEnhancements1()) setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand); // Needed so that we don't try to implement f128 constant loads using // a load-and-extend of a f80 constant (in cases where the constant // would fit in an f80). for (MVT VT : MVT::fp_valuetypes()) setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand); // We don't have extending load instruction on vector registers. if (Subtarget.hasVectorEnhancements1()) { setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand); } // Floating-point truncation and stores need to be done separately. setTruncStoreAction(MVT::f64, MVT::f32, Expand); setTruncStoreAction(MVT::f128, MVT::f32, Expand); setTruncStoreAction(MVT::f128, MVT::f64, Expand); // We have 64-bit FPR<->GPR moves, but need special handling for // 32-bit forms. if (!Subtarget.hasVector()) { setOperationAction(ISD::BITCAST, MVT::i32, Custom); setOperationAction(ISD::BITCAST, MVT::f32, Custom); } // VASTART and VACOPY need to deal with the SystemZ-specific varargs // structure, but VAEND is a no-op. setOperationAction(ISD::VASTART, MVT::Other, Custom); setOperationAction(ISD::VACOPY, MVT::Other, Custom); setOperationAction(ISD::VAEND, MVT::Other, Expand); setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom); // Codes for which we want to perform some z-specific combinations. setTargetDAGCombine({ISD::ZERO_EXTEND, ISD::SIGN_EXTEND, ISD::SIGN_EXTEND_INREG, ISD::LOAD, ISD::STORE, ISD::VECTOR_SHUFFLE, ISD::EXTRACT_VECTOR_ELT, ISD::FP_ROUND, ISD::STRICT_FP_ROUND, ISD::FP_EXTEND, ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::STRICT_FP_EXTEND, ISD::BSWAP, ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN}); // Handle intrinsics. setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); // We want to use MVC in preference to even a single load/store pair. MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0; MaxStoresPerMemcpyOptSize = 0; // The main memset sequence is a byte store followed by an MVC. // Two STC or MV..I stores win over that, but the kind of fused stores // generated by target-independent code don't when the byte value is // variable. E.g. "STC ;MHI ,257;STH " is not better // than "STC;MVC". Handle the choice in target-specific code instead. MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0; MaxStoresPerMemsetOptSize = 0; // Default to having -disable-strictnode-mutation on IsStrictFPEnabled = true; if (Subtarget.isTargetzOS()) { struct RTLibCallMapping { RTLIB::Libcall Code; const char *Name; }; static RTLibCallMapping RTLibCallCommon[] = { #define HANDLE_LIBCALL(code, name) {RTLIB::code, name}, #include "ZOSLibcallNames.def" }; for (auto &E : RTLibCallCommon) setLibcallName(E.Code, E.Name); } } bool SystemZTargetLowering::useSoftFloat() const { return Subtarget.hasSoftFloat(); } EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const { if (!VT.isVector()) return MVT::i32; return VT.changeVectorElementTypeToInteger(); } bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd( const MachineFunction &MF, EVT VT) const { VT = VT.getScalarType(); if (!VT.isSimple()) return false; switch (VT.getSimpleVT().SimpleTy) { case MVT::f32: case MVT::f64: return true; case MVT::f128: return Subtarget.hasVectorEnhancements1(); default: break; } return false; } // Return true if the constant can be generated with a vector instruction, // such as VGM, VGMB or VREPI. bool SystemZVectorConstantInfo::isVectorConstantLegal( const SystemZSubtarget &Subtarget) { const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); if (!Subtarget.hasVector() || (isFP128 && !Subtarget.hasVectorEnhancements1())) return false; // Try using VECTOR GENERATE BYTE MASK. This is the architecturally- // preferred way of creating all-zero and all-one vectors so give it // priority over other methods below. unsigned Mask = 0; unsigned I = 0; for (; I < SystemZ::VectorBytes; ++I) { uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue(); if (Byte == 0xff) Mask |= 1ULL << I; else if (Byte != 0) break; } if (I == SystemZ::VectorBytes) { Opcode = SystemZISD::BYTE_MASK; OpVals.push_back(Mask); VecVT = MVT::getVectorVT(MVT::getIntegerVT(8), 16); return true; } if (SplatBitSize > 64) return false; auto tryValue = [&](uint64_t Value) -> bool { // Try VECTOR REPLICATE IMMEDIATE int64_t SignedValue = SignExtend64(Value, SplatBitSize); if (isInt<16>(SignedValue)) { OpVals.push_back(((unsigned) SignedValue)); Opcode = SystemZISD::REPLICATE; VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize), SystemZ::VectorBits / SplatBitSize); return true; } // Try VECTOR GENERATE MASK unsigned Start, End; if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) { // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0 // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1). OpVals.push_back(Start - (64 - SplatBitSize)); OpVals.push_back(End - (64 - SplatBitSize)); Opcode = SystemZISD::ROTATE_MASK; VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize), SystemZ::VectorBits / SplatBitSize); return true; } return false; }; // First try assuming that any undefined bits above the highest set bit // and below the lowest set bit are 1s. This increases the likelihood of // being able to use a sign-extended element value in VECTOR REPLICATE // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK. uint64_t SplatBitsZ = SplatBits.getZExtValue(); uint64_t SplatUndefZ = SplatUndef.getZExtValue(); unsigned LowerBits = llvm::countr_zero(SplatBitsZ); unsigned UpperBits = llvm::countl_zero(SplatBitsZ); uint64_t Lower = SplatUndefZ & maskTrailingOnes(LowerBits); uint64_t Upper = SplatUndefZ & maskLeadingOnes(UpperBits); if (tryValue(SplatBitsZ | Upper | Lower)) return true; // Now try assuming that any undefined bits between the first and // last defined set bits are set. This increases the chances of // using a non-wraparound mask. uint64_t Middle = SplatUndefZ & ~Upper & ~Lower; return tryValue(SplatBitsZ | Middle); } SystemZVectorConstantInfo::SystemZVectorConstantInfo(APInt IntImm) { if (IntImm.isSingleWord()) { IntBits = APInt(128, IntImm.getZExtValue()); IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth()); } else IntBits = IntImm; assert(IntBits.getBitWidth() == 128 && "Unsupported APInt."); // Find the smallest splat. SplatBits = IntImm; unsigned Width = SplatBits.getBitWidth(); while (Width > 8) { unsigned HalfSize = Width / 2; APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize); APInt LowValue = SplatBits.trunc(HalfSize); // If the two halves do not match, stop here. if (HighValue != LowValue || 8 > HalfSize) break; SplatBits = HighValue; Width = HalfSize; } SplatUndef = 0; SplatBitSize = Width; } SystemZVectorConstantInfo::SystemZVectorConstantInfo(BuildVectorSDNode *BVN) { assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR"); bool HasAnyUndefs; // Get IntBits by finding the 128 bit splat. BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128, true); // Get SplatBits by finding the 8 bit or greater splat. BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8, true); } bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const { // We can load zero using LZ?R and negative zero using LZ?R;LC?BR. if (Imm.isZero() || Imm.isNegZero()) return true; return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget); } /// Returns true if stack probing through inline assembly is requested. bool SystemZTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const { // If the function specifically requests inline stack probes, emit them. if (MF.getFunction().hasFnAttribute("probe-stack")) return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() == "inline-asm"; return false; } TargetLowering::AtomicExpansionKind SystemZTargetLowering::shouldCastAtomicLoadInIR(LoadInst *LI) const { return AtomicExpansionKind::None; } TargetLowering::AtomicExpansionKind SystemZTargetLowering::shouldCastAtomicStoreInIR(StoreInst *SI) const { return AtomicExpansionKind::None; } TargetLowering::AtomicExpansionKind SystemZTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { // Don't expand subword operations as they require special treatment. if (RMW->getType()->isIntegerTy(8) || RMW->getType()->isIntegerTy(16)) return AtomicExpansionKind::None; // Don't expand if there is a target instruction available. if (Subtarget.hasInterlockedAccess1() && (RMW->getType()->isIntegerTy(32) || RMW->getType()->isIntegerTy(64)) && (RMW->getOperation() == AtomicRMWInst::BinOp::Add || RMW->getOperation() == AtomicRMWInst::BinOp::Sub || RMW->getOperation() == AtomicRMWInst::BinOp::And || RMW->getOperation() == AtomicRMWInst::BinOp::Or || RMW->getOperation() == AtomicRMWInst::BinOp::Xor)) return AtomicExpansionKind::None; return AtomicExpansionKind::CmpXChg; } bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const { // We can use CGFI or CLGFI. return isInt<32>(Imm) || isUInt<32>(Imm); } bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const { // We can use ALGFI or SLGFI. return isUInt<32>(Imm) || isUInt<32>(-Imm); } bool SystemZTargetLowering::allowsMisalignedMemoryAccesses( EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const { // Unaligned accesses should never be slower than the expanded version. // We check specifically for aligned accesses in the few cases where // they are required. if (Fast) *Fast = 1; return true; } // Information about the addressing mode for a memory access. struct AddressingMode { // True if a long displacement is supported. bool LongDisplacement; // True if use of index register is supported. bool IndexReg; AddressingMode(bool LongDispl, bool IdxReg) : LongDisplacement(LongDispl), IndexReg(IdxReg) {} }; // Return the desired addressing mode for a Load which has only one use (in // the same block) which is a Store. static AddressingMode getLoadStoreAddrMode(bool HasVector, Type *Ty) { // With vector support a Load->Store combination may be combined to either // an MVC or vector operations and it seems to work best to allow the // vector addressing mode. if (HasVector) return AddressingMode(false/*LongDispl*/, true/*IdxReg*/); // Otherwise only the MVC case is special. bool MVC = Ty->isIntegerTy(8); return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/); } // Return the addressing mode which seems most desirable given an LLVM // Instruction pointer. static AddressingMode supportedAddressingMode(Instruction *I, bool HasVector) { if (IntrinsicInst *II = dyn_cast(I)) { switch (II->getIntrinsicID()) { default: break; case Intrinsic::memset: case Intrinsic::memmove: case Intrinsic::memcpy: return AddressingMode(false/*LongDispl*/, false/*IdxReg*/); } } if (isa(I) && I->hasOneUse()) { auto *SingleUser = cast(*I->user_begin()); if (SingleUser->getParent() == I->getParent()) { if (isa(SingleUser)) { if (auto *C = dyn_cast(SingleUser->getOperand(1))) if (C->getBitWidth() <= 64 && (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue()))) // Comparison of memory with 16 bit signed / unsigned immediate return AddressingMode(false/*LongDispl*/, false/*IdxReg*/); } else if (isa(SingleUser)) // Load->Store return getLoadStoreAddrMode(HasVector, I->getType()); } } else if (auto *StoreI = dyn_cast(I)) { if (auto *LoadI = dyn_cast(StoreI->getValueOperand())) if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent()) // Load->Store return getLoadStoreAddrMode(HasVector, LoadI->getType()); } if (HasVector && (isa(I) || isa(I))) { // * Use LDE instead of LE/LEY for z13 to avoid partial register // dependencies (LDE only supports small offsets). // * Utilize the vector registers to hold floating point // values (vector load / store instructions only support small // offsets). Type *MemAccessTy = (isa(I) ? I->getType() : I->getOperand(0)->getType()); bool IsFPAccess = MemAccessTy->isFloatingPointTy(); bool IsVectorAccess = MemAccessTy->isVectorTy(); // A store of an extracted vector element will be combined into a VSTE type // instruction. if (!IsVectorAccess && isa(I)) { Value *DataOp = I->getOperand(0); if (isa(DataOp)) IsVectorAccess = true; } // A load which gets inserted into a vector element will be combined into a // VLE type instruction. if (!IsVectorAccess && isa(I) && I->hasOneUse()) { User *LoadUser = *I->user_begin(); if (isa(LoadUser)) IsVectorAccess = true; } if (IsFPAccess || IsVectorAccess) return AddressingMode(false/*LongDispl*/, true/*IdxReg*/); } return AddressingMode(true/*LongDispl*/, true/*IdxReg*/); } bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const { // Punt on globals for now, although they can be used in limited // RELATIVE LONG cases. if (AM.BaseGV) return false; // Require a 20-bit signed offset. if (!isInt<20>(AM.BaseOffs)) return false; bool RequireD12 = Subtarget.hasVector() && (Ty->isVectorTy() || Ty->isIntegerTy(128)); AddressingMode SupportedAM(!RequireD12, true); if (I != nullptr) SupportedAM = supportedAddressingMode(I, Subtarget.hasVector()); if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs)) return false; if (!SupportedAM.IndexReg) // No indexing allowed. return AM.Scale == 0; else // Indexing is OK but no scale factor can be applied. return AM.Scale == 0 || AM.Scale == 1; } bool SystemZTargetLowering::findOptimalMemOpLowering( std::vector &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const { const int MVCFastLen = 16; if (Limit != ~unsigned(0)) { // Don't expand Op into scalar loads/stores in these cases: if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen) return false; // Small memcpy: Use MVC if (Op.isMemset() && Op.size() - 1 <= MVCFastLen) return false; // Small memset (first byte with STC/MVI): Use MVC if (Op.isZeroMemset()) return false; // Memset zero: Use XC } return TargetLowering::findOptimalMemOpLowering(MemOps, Limit, Op, DstAS, SrcAS, FuncAttributes); } EVT SystemZTargetLowering::getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const { return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other; } bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const { if (!FromType->isIntegerTy() || !ToType->isIntegerTy()) return false; unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedValue(); unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedValue(); return FromBits > ToBits; } bool SystemZTargetLowering::isTruncateFree(EVT FromVT, EVT ToVT) const { if (!FromVT.isInteger() || !ToVT.isInteger()) return false; unsigned FromBits = FromVT.getFixedSizeInBits(); unsigned ToBits = ToVT.getFixedSizeInBits(); return FromBits > ToBits; } //===----------------------------------------------------------------------===// // Inline asm support //===----------------------------------------------------------------------===// TargetLowering::ConstraintType SystemZTargetLowering::getConstraintType(StringRef Constraint) const { if (Constraint.size() == 1) { switch (Constraint[0]) { case 'a': // Address register case 'd': // Data register (equivalent to 'r') case 'f': // Floating-point register case 'h': // High-part register case 'r': // General-purpose register case 'v': // Vector register return C_RegisterClass; case 'Q': // Memory with base and unsigned 12-bit displacement case 'R': // Likewise, plus an index case 'S': // Memory with base and signed 20-bit displacement case 'T': // Likewise, plus an index case 'm': // Equivalent to 'T'. return C_Memory; case 'I': // Unsigned 8-bit constant case 'J': // Unsigned 12-bit constant case 'K': // Signed 16-bit constant case 'L': // Signed 20-bit displacement (on all targets we support) case 'M': // 0x7fffffff return C_Immediate; default: break; } } else if (Constraint.size() == 2 && Constraint[0] == 'Z') { switch (Constraint[1]) { case 'Q': // Address with base and unsigned 12-bit displacement case 'R': // Likewise, plus an index case 'S': // Address with base and signed 20-bit displacement case 'T': // Likewise, plus an index return C_Address; default: break; } } return TargetLowering::getConstraintType(Constraint); } TargetLowering::ConstraintWeight SystemZTargetLowering:: getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const { ConstraintWeight weight = CW_Invalid; Value *CallOperandVal = info.CallOperandVal; // If we don't have a value, we can't do a match, // but allow it at the lowest weight. if (!CallOperandVal) return CW_Default; Type *type = CallOperandVal->getType(); // Look at the constraint type. switch (*constraint) { default: weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); break; case 'a': // Address register case 'd': // Data register (equivalent to 'r') case 'h': // High-part register case 'r': // General-purpose register weight = CallOperandVal->getType()->isIntegerTy() ? CW_Register : CW_Default; break; case 'f': // Floating-point register if (!useSoftFloat()) weight = type->isFloatingPointTy() ? CW_Register : CW_Default; break; case 'v': // Vector register if (Subtarget.hasVector()) weight = (type->isVectorTy() || type->isFloatingPointTy()) ? CW_Register : CW_Default; break; case 'I': // Unsigned 8-bit constant if (auto *C = dyn_cast(CallOperandVal)) if (isUInt<8>(C->getZExtValue())) weight = CW_Constant; break; case 'J': // Unsigned 12-bit constant if (auto *C = dyn_cast(CallOperandVal)) if (isUInt<12>(C->getZExtValue())) weight = CW_Constant; break; case 'K': // Signed 16-bit constant if (auto *C = dyn_cast(CallOperandVal)) if (isInt<16>(C->getSExtValue())) weight = CW_Constant; break; case 'L': // Signed 20-bit displacement (on all targets we support) if (auto *C = dyn_cast(CallOperandVal)) if (isInt<20>(C->getSExtValue())) weight = CW_Constant; break; case 'M': // 0x7fffffff if (auto *C = dyn_cast(CallOperandVal)) if (C->getZExtValue() == 0x7fffffff) weight = CW_Constant; break; } return weight; } // Parse a "{tNNN}" register constraint for which the register type "t" // has already been verified. MC is the class associated with "t" and // Map maps 0-based register numbers to LLVM register numbers. static std::pair parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC, const unsigned *Map, unsigned Size) { assert(*(Constraint.end()-1) == '}' && "Missing '}'"); if (isdigit(Constraint[2])) { unsigned Index; bool Failed = Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index); if (!Failed && Index < Size && Map[Index]) return std::make_pair(Map[Index], RC); } return std::make_pair(0U, nullptr); } std::pair SystemZTargetLowering::getRegForInlineAsmConstraint( const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { if (Constraint.size() == 1) { // GCC Constraint Letters switch (Constraint[0]) { default: break; case 'd': // Data register (equivalent to 'r') case 'r': // General-purpose register if (VT.getSizeInBits() == 64) return std::make_pair(0U, &SystemZ::GR64BitRegClass); else if (VT.getSizeInBits() == 128) return std::make_pair(0U, &SystemZ::GR128BitRegClass); return std::make_pair(0U, &SystemZ::GR32BitRegClass); case 'a': // Address register if (VT == MVT::i64) return std::make_pair(0U, &SystemZ::ADDR64BitRegClass); else if (VT == MVT::i128) return std::make_pair(0U, &SystemZ::ADDR128BitRegClass); return std::make_pair(0U, &SystemZ::ADDR32BitRegClass); case 'h': // High-part register (an LLVM extension) return std::make_pair(0U, &SystemZ::GRH32BitRegClass); case 'f': // Floating-point register if (!useSoftFloat()) { if (VT.getSizeInBits() == 64) return std::make_pair(0U, &SystemZ::FP64BitRegClass); else if (VT.getSizeInBits() == 128) return std::make_pair(0U, &SystemZ::FP128BitRegClass); return std::make_pair(0U, &SystemZ::FP32BitRegClass); } break; case 'v': // Vector register if (Subtarget.hasVector()) { if (VT.getSizeInBits() == 32) return std::make_pair(0U, &SystemZ::VR32BitRegClass); if (VT.getSizeInBits() == 64) return std::make_pair(0U, &SystemZ::VR64BitRegClass); return std::make_pair(0U, &SystemZ::VR128BitRegClass); } break; } } if (Constraint.starts_with("{")) { // A clobber constraint (e.g. ~{f0}) will have MVT::Other which is illegal // to check the size on. auto getVTSizeInBits = [&VT]() { return VT == MVT::Other ? 0 : VT.getSizeInBits(); }; // We need to override the default register parsing for GPRs and FPRs // because the interpretation depends on VT. The internal names of // the registers are also different from the external names // (F0D and F0S instead of F0, etc.). if (Constraint[1] == 'r') { if (getVTSizeInBits() == 32) return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass, SystemZMC::GR32Regs, 16); if (getVTSizeInBits() == 128) return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass, SystemZMC::GR128Regs, 16); return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass, SystemZMC::GR64Regs, 16); } if (Constraint[1] == 'f') { if (useSoftFloat()) return std::make_pair( 0u, static_cast(nullptr)); if (getVTSizeInBits() == 32) return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass, SystemZMC::FP32Regs, 16); if (getVTSizeInBits() == 128) return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass, SystemZMC::FP128Regs, 16); return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass, SystemZMC::FP64Regs, 16); } if (Constraint[1] == 'v') { if (!Subtarget.hasVector()) return std::make_pair( 0u, static_cast(nullptr)); if (getVTSizeInBits() == 32) return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass, SystemZMC::VR32Regs, 32); if (getVTSizeInBits() == 64) return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass, SystemZMC::VR64Regs, 32); return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass, SystemZMC::VR128Regs, 32); } } return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); } // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. Register SystemZTargetLowering::getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const { Register Reg = StringSwitch(RegName) .Case("r4", Subtarget.isTargetXPLINK64() ? SystemZ::R4D : 0) .Case("r15", Subtarget.isTargetELF() ? SystemZ::R15D : 0) .Default(0); if (Reg) return Reg; report_fatal_error("Invalid register name global variable"); } Register SystemZTargetLowering::getExceptionPointerRegister( const Constant *PersonalityFn) const { return Subtarget.isTargetXPLINK64() ? SystemZ::R1D : SystemZ::R6D; } Register SystemZTargetLowering::getExceptionSelectorRegister( const Constant *PersonalityFn) const { return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D; } void SystemZTargetLowering::LowerAsmOperandForConstraint( SDValue Op, StringRef Constraint, std::vector &Ops, SelectionDAG &DAG) const { // Only support length 1 constraints for now. if (Constraint.size() == 1) { switch (Constraint[0]) { case 'I': // Unsigned 8-bit constant if (auto *C = dyn_cast(Op)) if (isUInt<8>(C->getZExtValue())) Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), Op.getValueType())); return; case 'J': // Unsigned 12-bit constant if (auto *C = dyn_cast(Op)) if (isUInt<12>(C->getZExtValue())) Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), Op.getValueType())); return; case 'K': // Signed 16-bit constant if (auto *C = dyn_cast(Op)) if (isInt<16>(C->getSExtValue())) Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), Op.getValueType())); return; case 'L': // Signed 20-bit displacement (on all targets we support) if (auto *C = dyn_cast(Op)) if (isInt<20>(C->getSExtValue())) Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), Op.getValueType())); return; case 'M': // 0x7fffffff if (auto *C = dyn_cast(Op)) if (C->getZExtValue() == 0x7fffffff) Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), Op.getValueType())); return; } } TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } //===----------------------------------------------------------------------===// // Calling conventions //===----------------------------------------------------------------------===// #include "SystemZGenCallingConv.inc" const MCPhysReg *SystemZTargetLowering::getScratchRegisters( CallingConv::ID) const { static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D, SystemZ::R14D, 0 }; return ScratchRegs; } bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType, Type *ToType) const { return isTruncateFree(FromType, ToType); } bool SystemZTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { return CI->isTailCall(); } // Value is a value that has been passed to us in the location described by VA // (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining // any loads onto Chain. static SDValue convertLocVTToValVT(SelectionDAG &DAG, const SDLoc &DL, CCValAssign &VA, SDValue Chain, SDValue Value) { // If the argument has been promoted from a smaller type, insert an // assertion to capture this. if (VA.getLocInfo() == CCValAssign::SExt) Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value, DAG.getValueType(VA.getValVT())); else if (VA.getLocInfo() == CCValAssign::ZExt) Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value, DAG.getValueType(VA.getValVT())); if (VA.isExtInLoc()) Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value); else if (VA.getLocInfo() == CCValAssign::BCvt) { // If this is a short vector argument loaded from the stack, // extend from i64 to full vector size and then bitcast. assert(VA.getLocVT() == MVT::i64); assert(VA.getValVT().isVector()); Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)}); Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value); } else assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo"); return Value; } // Value is a value of type VA.getValVT() that we need to copy into // the location described by VA. Return a copy of Value converted to // VA.getValVT(). The caller is responsible for handling indirect values. static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL, CCValAssign &VA, SDValue Value) { switch (VA.getLocInfo()) { case CCValAssign::SExt: return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value); case CCValAssign::ZExt: return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value); case CCValAssign::AExt: return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value); case CCValAssign::BCvt: { assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128); assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 || VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128); // For an f32 vararg we need to first promote it to an f64 and then // bitcast it to an i64. if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64) Value = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Value); MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64 ? MVT::v2i64 : VA.getLocVT(); Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value); // For ELF, this is a short vector argument to be stored to the stack, // bitcast to v2i64 and then extract first element. if (BitCastToType == MVT::v2i64) return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value, DAG.getConstant(0, DL, MVT::i32)); return Value; } case CCValAssign::Full: return Value; default: llvm_unreachable("Unhandled getLocInfo()"); } } static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In) { SDLoc DL(In); SDValue Lo, Hi; if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) { Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, In); Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, DAG.getNode(ISD::SRL, DL, MVT::i128, In, DAG.getConstant(64, DL, MVT::i32))); } else { std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64); } // FIXME: If v2i64 were a legal type, we could use it instead of // Untyped here. This might enable improved folding. SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL, MVT::Untyped, Hi, Lo); return SDValue(Pair, 0); } static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In) { SDLoc DL(In); SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64, DL, MVT::i64, In); SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64, DL, MVT::i64, In); if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) { Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Lo); Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Hi); Hi = DAG.getNode(ISD::SHL, DL, MVT::i128, Hi, DAG.getConstant(64, DL, MVT::i32)); return DAG.getNode(ISD::OR, DL, MVT::i128, Lo, Hi); } else { return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi); } } bool SystemZTargetLowering::splitValueIntoRegisterParts( SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional CC) const { EVT ValueVT = Val.getValueType(); if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) { // Inline assembly operand. Parts[0] = lowerI128ToGR128(DAG, DAG.getBitcast(MVT::i128, Val)); return true; } return false; } SDValue SystemZTargetLowering::joinRegisterPartsIntoValue( SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional CC) const { if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) { // Inline assembly operand. SDValue Res = lowerGR128ToI128(DAG, Parts[0]); return DAG.getBitcast(ValueVT, Res); } return SDValue(); } SDValue SystemZTargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); SystemZMachineFunctionInfo *FuncInfo = MF.getInfo(); auto *TFL = Subtarget.getFrameLowering(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); // Assign locations to all of the incoming arguments. SmallVector ArgLocs; SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ); FuncInfo->setSizeOfFnParams(CCInfo.getStackSize()); unsigned NumFixedGPRs = 0; unsigned NumFixedFPRs = 0; for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { SDValue ArgValue; CCValAssign &VA = ArgLocs[I]; EVT LocVT = VA.getLocVT(); if (VA.isRegLoc()) { // Arguments passed in registers const TargetRegisterClass *RC; switch (LocVT.getSimpleVT().SimpleTy) { default: // Integers smaller than i64 should be promoted to i64. llvm_unreachable("Unexpected argument type"); case MVT::i32: NumFixedGPRs += 1; RC = &SystemZ::GR32BitRegClass; break; case MVT::i64: NumFixedGPRs += 1; RC = &SystemZ::GR64BitRegClass; break; case MVT::f32: NumFixedFPRs += 1; RC = &SystemZ::FP32BitRegClass; break; case MVT::f64: NumFixedFPRs += 1; RC = &SystemZ::FP64BitRegClass; break; case MVT::f128: NumFixedFPRs += 2; RC = &SystemZ::FP128BitRegClass; break; case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: case MVT::v4f32: case MVT::v2f64: RC = &SystemZ::VR128BitRegClass; break; } Register VReg = MRI.createVirtualRegister(RC); MRI.addLiveIn(VA.getLocReg(), VReg); ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); } else { assert(VA.isMemLoc() && "Argument not register or memory"); // Create the frame index object for this incoming parameter. // FIXME: Pre-include call frame size in the offset, should not // need to manually add it here. int64_t ArgSPOffset = VA.getLocMemOffset(); if (Subtarget.isTargetXPLINK64()) { auto &XPRegs = Subtarget.getSpecialRegisters(); ArgSPOffset += XPRegs.getCallFrameSize(); } int FI = MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true); // Create the SelectionDAG nodes corresponding to a load // from this parameter. Unpromoted ints and floats are // passed as right-justified 8-byte values. SDValue FIN = DAG.getFrameIndex(FI, PtrVT); if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(4, DL)); ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN, MachinePointerInfo::getFixedStack(MF, FI)); } // Convert the value of the argument register into the value that's // being passed. if (VA.getLocInfo() == CCValAssign::Indirect) { InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, MachinePointerInfo())); // If the original argument was split (e.g. i128), we need // to load all parts of it here (using the same address). unsigned ArgIndex = Ins[I].OrigArgIndex; assert (Ins[I].PartOffset == 0); while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) { CCValAssign &PartVA = ArgLocs[I + 1]; unsigned PartOffset = Ins[I + 1].PartOffset; SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, DAG.getIntPtrConstant(PartOffset, DL)); InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, MachinePointerInfo())); ++I; } } else InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue)); } if (IsVarArg && Subtarget.isTargetXPLINK64()) { // Save the number of non-varargs registers for later use by va_start, etc. FuncInfo->setVarArgsFirstGPR(NumFixedGPRs); FuncInfo->setVarArgsFirstFPR(NumFixedFPRs); auto *Regs = static_cast( Subtarget.getSpecialRegisters()); // Likewise the address (in the form of a frame index) of where the // first stack vararg would be. The 1-byte size here is arbitrary. // FIXME: Pre-include call frame size in the offset, should not // need to manually add it here. int64_t VarArgOffset = CCInfo.getStackSize() + Regs->getCallFrameSize(); int FI = MFI.CreateFixedObject(1, VarArgOffset, true); FuncInfo->setVarArgsFrameIndex(FI); } if (IsVarArg && Subtarget.isTargetELF()) { // Save the number of non-varargs registers for later use by va_start, etc. FuncInfo->setVarArgsFirstGPR(NumFixedGPRs); FuncInfo->setVarArgsFirstFPR(NumFixedFPRs); // Likewise the address (in the form of a frame index) of where the // first stack vararg would be. The 1-byte size here is arbitrary. int64_t VarArgsOffset = CCInfo.getStackSize(); FuncInfo->setVarArgsFrameIndex( MFI.CreateFixedObject(1, VarArgsOffset, true)); // ...and a similar frame index for the caller-allocated save area // that will be used to store the incoming registers. int64_t RegSaveOffset = -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16; unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true); FuncInfo->setRegSaveFrameIndex(RegSaveIndex); // Store the FPR varargs in the reserved frame slots. (We store the // GPRs as part of the prologue.) if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) { SDValue MemOps[SystemZ::ELFNumArgFPRs]; for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) { unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]); int FI = MFI.CreateFixedObject(8, -SystemZMC::ELFCallFrameSize + Offset, true); SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); Register VReg = MF.addLiveIn(SystemZ::ELFArgFPRs[I], &SystemZ::FP64BitRegClass); SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64); MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN, MachinePointerInfo::getFixedStack(MF, FI)); } // Join the stores, which are independent of one another. Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, ArrayRef(&MemOps[NumFixedFPRs], SystemZ::ELFNumArgFPRs - NumFixedFPRs)); } } if (Subtarget.isTargetXPLINK64()) { // Create virual register for handling incoming "ADA" special register (R5) const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass; Register ADAvReg = MRI.createVirtualRegister(RC); auto *Regs = static_cast( Subtarget.getSpecialRegisters()); MRI.addLiveIn(Regs->getADARegister(), ADAvReg); FuncInfo->setADAVirtualRegister(ADAvReg); } return Chain; } static bool canUseSiblingCall(const CCState &ArgCCInfo, SmallVectorImpl &ArgLocs, SmallVectorImpl &Outs) { // Punt if there are any indirect or stack arguments, or if the call // needs the callee-saved argument register R6, or if the call uses // the callee-saved register arguments SwiftSelf and SwiftError. for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { CCValAssign &VA = ArgLocs[I]; if (VA.getLocInfo() == CCValAssign::Indirect) return false; if (!VA.isRegLoc()) return false; Register Reg = VA.getLocReg(); if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D) return false; if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError()) return false; } return true; } static SDValue getADAEntry(SelectionDAG &DAG, SDValue Val, SDLoc DL, unsigned Offset, bool LoadAdr = false) { MachineFunction &MF = DAG.getMachineFunction(); SystemZMachineFunctionInfo *MFI = MF.getInfo(); unsigned ADAvReg = MFI->getADAVirtualRegister(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); SDValue Reg = DAG.getRegister(ADAvReg, PtrVT); SDValue Ofs = DAG.getTargetConstant(Offset, DL, PtrVT); SDValue Result = DAG.getNode(SystemZISD::ADA_ENTRY, DL, PtrVT, Val, Reg, Ofs); if (!LoadAdr) Result = DAG.getLoad( PtrVT, DL, DAG.getEntryNode(), Result, MachinePointerInfo(), Align(8), MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant); return Result; } // ADA access using Global value // Note: for functions, address of descriptor is returned static SDValue getADAEntry(SelectionDAG &DAG, const GlobalValue *GV, SDLoc DL, EVT PtrVT) { unsigned ADAtype; bool LoadAddr = false; const GlobalAlias *GA = dyn_cast(GV); bool IsFunction = (isa(GV)) || (GA && isa(GA->getAliaseeObject())); bool IsInternal = (GV->hasInternalLinkage() || GV->hasPrivateLinkage()); if (IsFunction) { if (IsInternal) { ADAtype = SystemZII::MO_ADA_DIRECT_FUNC_DESC; LoadAddr = true; } else ADAtype = SystemZII::MO_ADA_INDIRECT_FUNC_DESC; } else { ADAtype = SystemZII::MO_ADA_DATA_SYMBOL_ADDR; } SDValue Val = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ADAtype); return getADAEntry(DAG, Val, DL, 0, LoadAddr); } static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA, SDLoc &DL, SDValue &Chain) { unsigned ADADelta = 0; // ADA offset in desc. unsigned EPADelta = 8; // EPA offset in desc. MachineFunction &MF = DAG.getMachineFunction(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); // XPLink calling convention. if (auto *G = dyn_cast(Callee)) { bool IsInternal = (G->getGlobal()->hasInternalLinkage() || G->getGlobal()->hasPrivateLinkage()); if (IsInternal) { SystemZMachineFunctionInfo *MFI = MF.getInfo(); unsigned ADAvReg = MFI->getADAVirtualRegister(); ADA = DAG.getCopyFromReg(Chain, DL, ADAvReg, PtrVT); Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT); Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee); return true; } else { SDValue GA = DAG.getTargetGlobalAddress( G->getGlobal(), DL, PtrVT, 0, SystemZII::MO_ADA_DIRECT_FUNC_DESC); ADA = getADAEntry(DAG, GA, DL, ADADelta); Callee = getADAEntry(DAG, GA, DL, EPADelta); } } else if (auto *E = dyn_cast(Callee)) { SDValue ES = DAG.getTargetExternalSymbol( E->getSymbol(), PtrVT, SystemZII::MO_ADA_DIRECT_FUNC_DESC); ADA = getADAEntry(DAG, ES, DL, ADADelta); Callee = getADAEntry(DAG, ES, DL, EPADelta); } else { // Function pointer case ADA = DAG.getNode(ISD::ADD, DL, PtrVT, Callee, DAG.getConstant(ADADelta, DL, PtrVT)); ADA = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), ADA, MachinePointerInfo::getGOT(DAG.getMachineFunction())); Callee = DAG.getNode(ISD::ADD, DL, PtrVT, Callee, DAG.getConstant(EPADelta, DL, PtrVT)); Callee = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Callee, MachinePointerInfo::getGOT(DAG.getMachineFunction())); } return false; } SDValue SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { SelectionDAG &DAG = CLI.DAG; SDLoc &DL = CLI.DL; SmallVectorImpl &Outs = CLI.Outs; SmallVectorImpl &OutVals = CLI.OutVals; SmallVectorImpl &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; bool &IsTailCall = CLI.IsTailCall; CallingConv::ID CallConv = CLI.CallConv; bool IsVarArg = CLI.IsVarArg; MachineFunction &MF = DAG.getMachineFunction(); EVT PtrVT = getPointerTy(MF.getDataLayout()); LLVMContext &Ctx = *DAG.getContext(); SystemZCallingConventionRegisters *Regs = Subtarget.getSpecialRegisters(); // FIXME: z/OS support to be added in later. if (Subtarget.isTargetXPLINK64()) IsTailCall = false; // Analyze the operands of the call, assigning locations to each operand. SmallVector ArgLocs; SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx); ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ); // We don't support GuaranteedTailCallOpt, only automatically-detected // sibling calls. if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs)) IsTailCall = false; // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = ArgCCInfo.getStackSize(); // Mark the start of the call. if (!IsTailCall) Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL); // Copy argument values to their designated locations. SmallVector, 9> RegsToPass; SmallVector MemOpChains; SDValue StackPtr; for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { CCValAssign &VA = ArgLocs[I]; SDValue ArgValue = OutVals[I]; if (VA.getLocInfo() == CCValAssign::Indirect) { // Store the argument in a stack slot and pass its address. unsigned ArgIndex = Outs[I].OrigArgIndex; EVT SlotVT; if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) { // Allocate the full stack space for a promoted (and split) argument. Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty; EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType); MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT); unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT); SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N); } else { SlotVT = Outs[I].VT; } SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT); int FI = cast(SpillSlot)->getIndex(); MemOpChains.push_back( DAG.getStore(Chain, DL, ArgValue, SpillSlot, MachinePointerInfo::getFixedStack(MF, FI))); // If the original argument was split (e.g. i128), we need // to store all parts of it here (and pass just one address). assert (Outs[I].PartOffset == 0); while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) { SDValue PartValue = OutVals[I + 1]; unsigned PartOffset = Outs[I + 1].PartOffset; SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, DAG.getIntPtrConstant(PartOffset, DL)); MemOpChains.push_back( DAG.getStore(Chain, DL, PartValue, Address, MachinePointerInfo::getFixedStack(MF, FI))); assert((PartOffset + PartValue.getValueType().getStoreSize() <= SlotVT.getStoreSize()) && "Not enough space for argument part!"); ++I; } ArgValue = SpillSlot; } else ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue); if (VA.isRegLoc()) { // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a // MVT::i128 type. We decompose the 128-bit type to a pair of its high // and low values. if (VA.getLocVT() == MVT::i128) ArgValue = lowerI128ToGR128(DAG, ArgValue); // Queue up the argument copies and emit them at the end. RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); } else { assert(VA.isMemLoc() && "Argument not register or memory"); // Work out the address of the stack slot. Unpromoted ints and // floats are passed as right-justified 8-byte values. if (!StackPtr.getNode()) StackPtr = DAG.getCopyFromReg(Chain, DL, Regs->getStackPointerRegister(), PtrVT); unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() + VA.getLocMemOffset(); if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) Offset += 4; SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, DAG.getIntPtrConstant(Offset, DL)); // Emit the store. MemOpChains.push_back( DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); // Although long doubles or vectors are passed through the stack when // they are vararg (non-fixed arguments), if a long double or vector // occupies the third and fourth slot of the argument list GPR3 should // still shadow the third slot of the argument list. if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) { SDValue ShadowArgValue = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue, DAG.getIntPtrConstant(1, DL)); RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue)); } } } // Join the stores, which are independent of one another. if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); // Accept direct calls by converting symbolic call addresses to the // associated Target* opcodes. Force %r1 to be used for indirect // tail calls. SDValue Glue; if (Subtarget.isTargetXPLINK64()) { SDValue ADA; bool IsBRASL = getzOSCalleeAndADA(DAG, Callee, ADA, DL, Chain); if (!IsBRASL) { unsigned CalleeReg = static_cast(Regs) ->getAddressOfCalleeRegister(); Chain = DAG.getCopyToReg(Chain, DL, CalleeReg, Callee, Glue); Glue = Chain.getValue(1); Callee = DAG.getRegister(CalleeReg, Callee.getValueType()); } RegsToPass.push_back(std::make_pair( static_cast(Regs)->getADARegister(), ADA)); } else { if (auto *G = dyn_cast(Callee)) { Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT); Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee); } else if (auto *E = dyn_cast(Callee)) { Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT); Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee); } else if (IsTailCall) { Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue); Glue = Chain.getValue(1); Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType()); } } // Build a sequence of copy-to-reg nodes, chained and glued together. for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) { Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first, RegsToPass[I].second, Glue); Glue = Chain.getValue(1); } // The first call operand is the chain and the second is the target address. SmallVector Ops; Ops.push_back(Chain); Ops.push_back(Callee); // Add argument registers to the end of the list so that they are // known live into the call. for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) Ops.push_back(DAG.getRegister(RegsToPass[I].first, RegsToPass[I].second.getValueType())); // Add a register mask operand representing the call-preserved registers. const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); // Glue the call to the argument copies, if any. if (Glue.getNode()) Ops.push_back(Glue); // Emit the call. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); if (IsTailCall) { SDValue Ret = DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops); DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge); return Ret; } Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops); DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); Glue = Chain.getValue(1); // Mark the end of the call, which is glued to the call itself. Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL); Glue = Chain.getValue(1); // Assign locations to each value returned by this call. SmallVector RetLocs; CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx); RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ); // Copy all of the result registers out of their specified physreg. for (CCValAssign &VA : RetLocs) { // Copy the value out, gluing the copy to the end of the call sequence. SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); Chain = RetValue.getValue(1); Glue = RetValue.getValue(2); // Convert the value of the return register into the value that's // being returned. InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue)); } return Chain; } // Generate a call taking the given operands as arguments and returning a // result of type RetVT. std::pair SystemZTargetLowering::makeExternalCall( SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT, ArrayRef Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL, bool DoesNotReturn, bool IsReturnValueUsed) const { TargetLowering::ArgListTy Args; Args.reserve(Ops.size()); TargetLowering::ArgListEntry Entry; for (SDValue Op : Ops) { Entry.Node = Op; Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned); Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned); Args.push_back(Entry); } SDValue Callee = DAG.getExternalSymbol(CalleeName, getPointerTy(DAG.getDataLayout())); Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); TargetLowering::CallLoweringInfo CLI(DAG); bool SignExtend = shouldSignExtendTypeInLibCall(RetVT, IsSigned); CLI.setDebugLoc(DL) .setChain(Chain) .setCallee(CallConv, RetTy, Callee, std::move(Args)) .setNoReturn(DoesNotReturn) .setDiscardResult(!IsReturnValueUsed) .setSExtResult(SignExtend) .setZExtResult(!SignExtend); return LowerCallTo(CLI); } bool SystemZTargetLowering:: CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl &Outs, LLVMContext &Context) const { // Special case that we cannot easily detect in RetCC_SystemZ since // i128 may not be a legal type. for (auto &Out : Outs) if (Out.ArgVT == MVT::i128) return false; SmallVector RetLocs; CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context); return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ); } SDValue SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &DL, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); // Assign locations to each returned value. SmallVector RetLocs; CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext()); RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ); // Quick exit for void returns if (RetLocs.empty()) return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, Chain); if (CallConv == CallingConv::GHC) report_fatal_error("GHC functions return void only"); // Copy the result values into the output registers. SDValue Glue; SmallVector RetOps; RetOps.push_back(Chain); for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) { CCValAssign &VA = RetLocs[I]; SDValue RetValue = OutVals[I]; // Make the return register live on exit. assert(VA.isRegLoc() && "Can only return in registers!"); // Promote the value as required. RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue); // Chain and glue the copies together. Register Reg = VA.getLocReg(); Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue); Glue = Chain.getValue(1); RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT())); } // Update chain and glue. RetOps[0] = Chain; if (Glue.getNode()) RetOps.push_back(Glue); return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, RetOps); } // Return true if Op is an intrinsic node with chain that returns the CC value // as its only (other) argument. Provide the associated SystemZISD opcode and // the mask of valid CC values if so. static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, unsigned &CCValid) { unsigned Id = Op.getConstantOperandVal(1); switch (Id) { case Intrinsic::s390_tbegin: Opcode = SystemZISD::TBEGIN; CCValid = SystemZ::CCMASK_TBEGIN; return true; case Intrinsic::s390_tbegin_nofloat: Opcode = SystemZISD::TBEGIN_NOFLOAT; CCValid = SystemZ::CCMASK_TBEGIN; return true; case Intrinsic::s390_tend: Opcode = SystemZISD::TEND; CCValid = SystemZ::CCMASK_TEND; return true; default: return false; } } // Return true if Op is an intrinsic node without chain that returns the // CC value as its final argument. Provide the associated SystemZISD // opcode and the mask of valid CC values if so. static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) { unsigned Id = Op.getConstantOperandVal(0); switch (Id) { case Intrinsic::s390_vpkshs: case Intrinsic::s390_vpksfs: case Intrinsic::s390_vpksgs: Opcode = SystemZISD::PACKS_CC; CCValid = SystemZ::CCMASK_VCMP; return true; case Intrinsic::s390_vpklshs: case Intrinsic::s390_vpklsfs: case Intrinsic::s390_vpklsgs: Opcode = SystemZISD::PACKLS_CC; CCValid = SystemZ::CCMASK_VCMP; return true; case Intrinsic::s390_vceqbs: case Intrinsic::s390_vceqhs: case Intrinsic::s390_vceqfs: case Intrinsic::s390_vceqgs: Opcode = SystemZISD::VICMPES; CCValid = SystemZ::CCMASK_VCMP; return true; case Intrinsic::s390_vchbs: case Intrinsic::s390_vchhs: case Intrinsic::s390_vchfs: case Intrinsic::s390_vchgs: Opcode = SystemZISD::VICMPHS; CCValid = SystemZ::CCMASK_VCMP; return true; case Intrinsic::s390_vchlbs: case Intrinsic::s390_vchlhs: case Intrinsic::s390_vchlfs: case Intrinsic::s390_vchlgs: Opcode = SystemZISD::VICMPHLS; CCValid = SystemZ::CCMASK_VCMP; return true; case Intrinsic::s390_vtm: Opcode = SystemZISD::VTM; CCValid = SystemZ::CCMASK_VCMP; return true; case Intrinsic::s390_vfaebs: case Intrinsic::s390_vfaehs: case Intrinsic::s390_vfaefs: Opcode = SystemZISD::VFAE_CC; CCValid = SystemZ::CCMASK_ANY; return true; case Intrinsic::s390_vfaezbs: case Intrinsic::s390_vfaezhs: case Intrinsic::s390_vfaezfs: Opcode = SystemZISD::VFAEZ_CC; CCValid = SystemZ::CCMASK_ANY; return true; case Intrinsic::s390_vfeebs: case Intrinsic::s390_vfeehs: case Intrinsic::s390_vfeefs: Opcode = SystemZISD::VFEE_CC; CCValid = SystemZ::CCMASK_ANY; return true; case Intrinsic::s390_vfeezbs: case Intrinsic::s390_vfeezhs: case Intrinsic::s390_vfeezfs: Opcode = SystemZISD::VFEEZ_CC; CCValid = SystemZ::CCMASK_ANY; return true; case Intrinsic::s390_vfenebs: case Intrinsic::s390_vfenehs: case Intrinsic::s390_vfenefs: Opcode = SystemZISD::VFENE_CC; CCValid = SystemZ::CCMASK_ANY; return true; case Intrinsic::s390_vfenezbs: case Intrinsic::s390_vfenezhs: case Intrinsic::s390_vfenezfs: Opcode = SystemZISD::VFENEZ_CC; CCValid = SystemZ::CCMASK_ANY; return true; case Intrinsic::s390_vistrbs: case Intrinsic::s390_vistrhs: case Intrinsic::s390_vistrfs: Opcode = SystemZISD::VISTR_CC; CCValid = SystemZ::CCMASK_0 | SystemZ::CCMASK_3; return true; case Intrinsic::s390_vstrcbs: case Intrinsic::s390_vstrchs: case Intrinsic::s390_vstrcfs: Opcode = SystemZISD::VSTRC_CC; CCValid = SystemZ::CCMASK_ANY; return true; case Intrinsic::s390_vstrczbs: case Intrinsic::s390_vstrczhs: case Intrinsic::s390_vstrczfs: Opcode = SystemZISD::VSTRCZ_CC; CCValid = SystemZ::CCMASK_ANY; return true; case Intrinsic::s390_vstrsb: case Intrinsic::s390_vstrsh: case Intrinsic::s390_vstrsf: Opcode = SystemZISD::VSTRS_CC; CCValid = SystemZ::CCMASK_ANY; return true; case Intrinsic::s390_vstrszb: case Intrinsic::s390_vstrszh: case Intrinsic::s390_vstrszf: Opcode = SystemZISD::VSTRSZ_CC; CCValid = SystemZ::CCMASK_ANY; return true; case Intrinsic::s390_vfcedbs: case Intrinsic::s390_vfcesbs: Opcode = SystemZISD::VFCMPES; CCValid = SystemZ::CCMASK_VCMP; return true; case Intrinsic::s390_vfchdbs: case Intrinsic::s390_vfchsbs: Opcode = SystemZISD::VFCMPHS; CCValid = SystemZ::CCMASK_VCMP; return true; case Intrinsic::s390_vfchedbs: case Intrinsic::s390_vfchesbs: Opcode = SystemZISD::VFCMPHES; CCValid = SystemZ::CCMASK_VCMP; return true; case Intrinsic::s390_vftcidb: case Intrinsic::s390_vftcisb: Opcode = SystemZISD::VFTCI; CCValid = SystemZ::CCMASK_VCMP; return true; case Intrinsic::s390_tdc: Opcode = SystemZISD::TDC; CCValid = SystemZ::CCMASK_TDC; return true; default: return false; } } // Emit an intrinsic with chain and an explicit CC register result. static SDNode *emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op, unsigned Opcode) { // Copy all operands except the intrinsic ID. unsigned NumOps = Op.getNumOperands(); SmallVector Ops; Ops.reserve(NumOps - 1); Ops.push_back(Op.getOperand(0)); for (unsigned I = 2; I < NumOps; ++I) Ops.push_back(Op.getOperand(I)); assert(Op->getNumValues() == 2 && "Expected only CC result and chain"); SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other); SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops); SDValue OldChain = SDValue(Op.getNode(), 1); SDValue NewChain = SDValue(Intr.getNode(), 1); DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain); return Intr.getNode(); } // Emit an intrinsic with an explicit CC register result. static SDNode *emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op, unsigned Opcode) { // Copy all operands except the intrinsic ID. unsigned NumOps = Op.getNumOperands(); SmallVector Ops; Ops.reserve(NumOps - 1); for (unsigned I = 1; I < NumOps; ++I) Ops.push_back(Op.getOperand(I)); SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops); return Intr.getNode(); } // CC is a comparison that will be implemented using an integer or // floating-point comparison. Return the condition code mask for // a branch on true. In the integer case, CCMASK_CMP_UO is set for // unsigned comparisons and clear for signed ones. In the floating-point // case, CCMASK_CMP_UO has its normal mask meaning (unordered). static unsigned CCMaskForCondCode(ISD::CondCode CC) { #define CONV(X) \ case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \ case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \ case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X switch (CC) { default: llvm_unreachable("Invalid integer condition!"); CONV(EQ); CONV(NE); CONV(GT); CONV(GE); CONV(LT); CONV(LE); case ISD::SETO: return SystemZ::CCMASK_CMP_O; case ISD::SETUO: return SystemZ::CCMASK_CMP_UO; } #undef CONV } // If C can be converted to a comparison against zero, adjust the operands // as necessary. static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { if (C.ICmpType == SystemZICMP::UnsignedOnly) return; auto *ConstOp1 = dyn_cast(C.Op1.getNode()); if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64) return; int64_t Value = ConstOp1->getSExtValue(); if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) || (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) || (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) || (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) { C.CCMask ^= SystemZ::CCMASK_CMP_EQ; C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType()); } } // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI, // adjust the operands as necessary. static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { // For us to make any changes, it must a comparison between a single-use // load and a constant. if (!C.Op0.hasOneUse() || C.Op0.getOpcode() != ISD::LOAD || C.Op1.getOpcode() != ISD::Constant) return; // We must have an 8- or 16-bit load. auto *Load = cast(C.Op0); unsigned NumBits = Load->getMemoryVT().getSizeInBits(); if ((NumBits != 8 && NumBits != 16) || NumBits != Load->getMemoryVT().getStoreSizeInBits()) return; // The load must be an extending one and the constant must be within the // range of the unextended value. auto *ConstOp1 = cast(C.Op1); if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64) return; uint64_t Value = ConstOp1->getZExtValue(); uint64_t Mask = (1 << NumBits) - 1; if (Load->getExtensionType() == ISD::SEXTLOAD) { // Make sure that ConstOp1 is in range of C.Op0. int64_t SignedValue = ConstOp1->getSExtValue(); if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask) return; if (C.ICmpType != SystemZICMP::SignedOnly) { // Unsigned comparison between two sign-extended values is equivalent // to unsigned comparison between two zero-extended values. Value &= Mask; } else if (NumBits == 8) { // Try to treat the comparison as unsigned, so that we can use CLI. // Adjust CCMask and Value as necessary. if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT) // Test whether the high bit of the byte is set. Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT; else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE) // Test whether the high bit of the byte is clear. Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT; else // No instruction exists for this combination. return; C.ICmpType = SystemZICMP::UnsignedOnly; } } else if (Load->getExtensionType() == ISD::ZEXTLOAD) { if (Value > Mask) return; // If the constant is in range, we can use any comparison. C.ICmpType = SystemZICMP::Any; } else return; // Make sure that the first operand is an i32 of the right extension type. ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ? ISD::SEXTLOAD : ISD::ZEXTLOAD); if (C.Op0.getValueType() != MVT::i32 || Load->getExtensionType() != ExtType) { C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(), Load->getBasePtr(), Load->getPointerInfo(), Load->getMemoryVT(), Load->getAlign(), Load->getMemOperand()->getFlags()); // Update the chain uses. DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1)); } // Make sure that the second operand is an i32 with the right value. if (C.Op1.getValueType() != MVT::i32 || Value != ConstOp1->getZExtValue()) C.Op1 = DAG.getConstant(Value, DL, MVT::i32); } // Return true if Op is either an unextended load, or a load suitable // for integer register-memory comparisons of type ICmpType. static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) { auto *Load = dyn_cast(Op.getNode()); if (Load) { // There are no instructions to compare a register with a memory byte. if (Load->getMemoryVT() == MVT::i8) return false; // Otherwise decide on extension type. switch (Load->getExtensionType()) { case ISD::NON_EXTLOAD: return true; case ISD::SEXTLOAD: return ICmpType != SystemZICMP::UnsignedOnly; case ISD::ZEXTLOAD: return ICmpType != SystemZICMP::SignedOnly; default: break; } } return false; } // Return true if it is better to swap the operands of C. static bool shouldSwapCmpOperands(const Comparison &C) { // Leave i128 and f128 comparisons alone, since they have no memory forms. if (C.Op0.getValueType() == MVT::i128) return false; if (C.Op0.getValueType() == MVT::f128) return false; // Always keep a floating-point constant second, since comparisons with // zero can use LOAD TEST and comparisons with other constants make a // natural memory operand. if (isa(C.Op1)) return false; // Never swap comparisons with zero since there are many ways to optimize // those later. auto *ConstOp1 = dyn_cast(C.Op1); if (ConstOp1 && ConstOp1->getZExtValue() == 0) return false; // Also keep natural memory operands second if the loaded value is // only used here. Several comparisons have memory forms. if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse()) return false; // Look for cases where Cmp0 is a single-use load and Cmp1 isn't. // In that case we generally prefer the memory to be second. if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) { // The only exceptions are when the second operand is a constant and // we can use things like CHHSI. if (!ConstOp1) return true; // The unsigned memory-immediate instructions can handle 16-bit // unsigned integers. if (C.ICmpType != SystemZICMP::SignedOnly && isUInt<16>(ConstOp1->getZExtValue())) return false; // The signed memory-immediate instructions can handle 16-bit // signed integers. if (C.ICmpType != SystemZICMP::UnsignedOnly && isInt<16>(ConstOp1->getSExtValue())) return false; return true; } // Try to promote the use of CGFR and CLGFR. unsigned Opcode0 = C.Op0.getOpcode(); if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND) return true; if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND) return true; if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::AND && C.Op0.getOperand(1).getOpcode() == ISD::Constant && C.Op0.getConstantOperandVal(1) == 0xffffffff) return true; return false; } // Check whether C tests for equality between X and Y and whether X - Y // or Y - X is also computed. In that case it's better to compare the // result of the subtraction against zero. static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { if (C.CCMask == SystemZ::CCMASK_CMP_EQ || C.CCMask == SystemZ::CCMASK_CMP_NE) { for (SDNode *N : C.Op0->uses()) { if (N->getOpcode() == ISD::SUB && ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) || (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) { // Disable the nsw and nuw flags: the backend needs to handle // overflow as well during comparison elimination. SDNodeFlags Flags = N->getFlags(); Flags.setNoSignedWrap(false); Flags.setNoUnsignedWrap(false); N->setFlags(Flags); C.Op0 = SDValue(N, 0); C.Op1 = DAG.getConstant(0, DL, N->getValueType(0)); return; } } } } // Check whether C compares a floating-point value with zero and if that // floating-point value is also negated. In this case we can use the // negation to set CC, so avoiding separate LOAD AND TEST and // LOAD (NEGATIVE/COMPLEMENT) instructions. static void adjustForFNeg(Comparison &C) { // This optimization is invalid for strict comparisons, since FNEG // does not raise any exceptions. if (C.Chain) return; auto *C1 = dyn_cast(C.Op1); if (C1 && C1->isZero()) { for (SDNode *N : C.Op0->uses()) { if (N->getOpcode() == ISD::FNEG) { C.Op0 = SDValue(N, 0); C.CCMask = SystemZ::reverseCCMask(C.CCMask); return; } } } } // Check whether C compares (shl X, 32) with 0 and whether X is // also sign-extended. In that case it is better to test the result // of the sign extension using LTGFR. // // This case is important because InstCombine transforms a comparison // with (sext (trunc X)) into a comparison with (shl X, 32). static void adjustForLTGFR(Comparison &C) { // Check for a comparison between (shl X, 32) and 0. if (C.Op0.getOpcode() == ISD::SHL && C.Op0.getValueType() == MVT::i64 && C.Op1.getOpcode() == ISD::Constant && C.Op1->getAsZExtVal() == 0) { auto *C1 = dyn_cast(C.Op0.getOperand(1)); if (C1 && C1->getZExtValue() == 32) { SDValue ShlOp0 = C.Op0.getOperand(0); // See whether X has any SIGN_EXTEND_INREG uses. for (SDNode *N : ShlOp0->uses()) { if (N->getOpcode() == ISD::SIGN_EXTEND_INREG && cast(N->getOperand(1))->getVT() == MVT::i32) { C.Op0 = SDValue(N, 0); return; } } } } } // If C compares the truncation of an extending load, try to compare // the untruncated value instead. This exposes more opportunities to // reuse CC. static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { if (C.Op0.getOpcode() == ISD::TRUNCATE && C.Op0.getOperand(0).getOpcode() == ISD::LOAD && C.Op1.getOpcode() == ISD::Constant && cast(C.Op1)->getValueSizeInBits(0) <= 64 && C.Op1->getAsZExtVal() == 0) { auto *L = cast(C.Op0.getOperand(0)); if (L->getMemoryVT().getStoreSizeInBits().getFixedValue() <= C.Op0.getValueSizeInBits().getFixedValue()) { unsigned Type = L->getExtensionType(); if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) || (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) { C.Op0 = C.Op0.getOperand(0); C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType()); } } } } // Return true if shift operation N has an in-range constant shift value. // Store it in ShiftVal if so. static bool isSimpleShift(SDValue N, unsigned &ShiftVal) { auto *Shift = dyn_cast(N.getOperand(1)); if (!Shift) return false; uint64_t Amount = Shift->getZExtValue(); if (Amount >= N.getValueSizeInBits()) return false; ShiftVal = Amount; return true; } // Check whether an AND with Mask is suitable for a TEST UNDER MASK // instruction and whether the CC value is descriptive enough to handle // a comparison of type Opcode between the AND result and CmpVal. // CCMask says which comparison result is being tested and BitSize is // the number of bits in the operands. If TEST UNDER MASK can be used, // return the corresponding CC mask, otherwise return 0. static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, uint64_t Mask, uint64_t CmpVal, unsigned ICmpType) { assert(Mask != 0 && "ANDs with zero should have been removed by now"); // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL. if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) && !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask)) return 0; // Work out the masks for the lowest and highest bits. uint64_t High = llvm::bit_floor(Mask); uint64_t Low = uint64_t(1) << llvm::countr_zero(Mask); // Signed ordered comparisons are effectively unsigned if the sign // bit is dropped. bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly); // Check for equality comparisons with 0, or the equivalent. if (CmpVal == 0) { if (CCMask == SystemZ::CCMASK_CMP_EQ) return SystemZ::CCMASK_TM_ALL_0; if (CCMask == SystemZ::CCMASK_CMP_NE) return SystemZ::CCMASK_TM_SOME_1; } if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) { if (CCMask == SystemZ::CCMASK_CMP_LT) return SystemZ::CCMASK_TM_ALL_0; if (CCMask == SystemZ::CCMASK_CMP_GE) return SystemZ::CCMASK_TM_SOME_1; } if (EffectivelyUnsigned && CmpVal < Low) { if (CCMask == SystemZ::CCMASK_CMP_LE) return SystemZ::CCMASK_TM_ALL_0; if (CCMask == SystemZ::CCMASK_CMP_GT) return SystemZ::CCMASK_TM_SOME_1; } // Check for equality comparisons with the mask, or the equivalent. if (CmpVal == Mask) { if (CCMask == SystemZ::CCMASK_CMP_EQ) return SystemZ::CCMASK_TM_ALL_1; if (CCMask == SystemZ::CCMASK_CMP_NE) return SystemZ::CCMASK_TM_SOME_0; } if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) { if (CCMask == SystemZ::CCMASK_CMP_GT) return SystemZ::CCMASK_TM_ALL_1; if (CCMask == SystemZ::CCMASK_CMP_LE) return SystemZ::CCMASK_TM_SOME_0; } if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) { if (CCMask == SystemZ::CCMASK_CMP_GE) return SystemZ::CCMASK_TM_ALL_1; if (CCMask == SystemZ::CCMASK_CMP_LT) return SystemZ::CCMASK_TM_SOME_0; } // Check for ordered comparisons with the top bit. if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) { if (CCMask == SystemZ::CCMASK_CMP_LE) return SystemZ::CCMASK_TM_MSB_0; if (CCMask == SystemZ::CCMASK_CMP_GT) return SystemZ::CCMASK_TM_MSB_1; } if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) { if (CCMask == SystemZ::CCMASK_CMP_LT) return SystemZ::CCMASK_TM_MSB_0; if (CCMask == SystemZ::CCMASK_CMP_GE) return SystemZ::CCMASK_TM_MSB_1; } // If there are just two bits, we can do equality checks for Low and High // as well. if (Mask == Low + High) { if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low) return SystemZ::CCMASK_TM_MIXED_MSB_0; if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low) return SystemZ::CCMASK_TM_MIXED_MSB_0 ^ SystemZ::CCMASK_ANY; if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High) return SystemZ::CCMASK_TM_MIXED_MSB_1; if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High) return SystemZ::CCMASK_TM_MIXED_MSB_1 ^ SystemZ::CCMASK_ANY; } // Looks like we've exhausted our options. return 0; } // See whether C can be implemented as a TEST UNDER MASK instruction. // Update the arguments with the TM version if so. static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { // Use VECTOR TEST UNDER MASK for i128 operations. if (C.Op0.getValueType() == MVT::i128) { // We can use VTM for EQ/NE comparisons of x & y against 0. if (C.Op0.getOpcode() == ISD::AND && (C.CCMask == SystemZ::CCMASK_CMP_EQ || C.CCMask == SystemZ::CCMASK_CMP_NE)) { auto *Mask = dyn_cast(C.Op1); if (Mask && Mask->getAPIntValue() == 0) { C.Opcode = SystemZISD::VTM; C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(1)); C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(0)); C.CCValid = SystemZ::CCMASK_VCMP; if (C.CCMask == SystemZ::CCMASK_CMP_EQ) C.CCMask = SystemZ::CCMASK_VCMP_ALL; else C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid; } } return; } // Check that we have a comparison with a constant. auto *ConstOp1 = dyn_cast(C.Op1); if (!ConstOp1) return; uint64_t CmpVal = ConstOp1->getZExtValue(); // Check whether the nonconstant input is an AND with a constant mask. Comparison NewC(C); uint64_t MaskVal; ConstantSDNode *Mask = nullptr; if (C.Op0.getOpcode() == ISD::AND) { NewC.Op0 = C.Op0.getOperand(0); NewC.Op1 = C.Op0.getOperand(1); Mask = dyn_cast(NewC.Op1); if (!Mask) return; MaskVal = Mask->getZExtValue(); } else { // There is no instruction to compare with a 64-bit immediate // so use TMHH instead if possible. We need an unsigned ordered // comparison with an i64 immediate. if (NewC.Op0.getValueType() != MVT::i64 || NewC.CCMask == SystemZ::CCMASK_CMP_EQ || NewC.CCMask == SystemZ::CCMASK_CMP_NE || NewC.ICmpType == SystemZICMP::SignedOnly) return; // Convert LE and GT comparisons into LT and GE. if (NewC.CCMask == SystemZ::CCMASK_CMP_LE || NewC.CCMask == SystemZ::CCMASK_CMP_GT) { if (CmpVal == uint64_t(-1)) return; CmpVal += 1; NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ; } // If the low N bits of Op1 are zero than the low N bits of Op0 can // be masked off without changing the result. MaskVal = -(CmpVal & -CmpVal); NewC.ICmpType = SystemZICMP::UnsignedOnly; } if (!MaskVal) return; // Check whether the combination of mask, comparison value and comparison // type are suitable. unsigned BitSize = NewC.Op0.getValueSizeInBits(); unsigned NewCCMask, ShiftVal; if (NewC.ICmpType != SystemZICMP::SignedOnly && NewC.Op0.getOpcode() == ISD::SHL && isSimpleShift(NewC.Op0, ShiftVal) && (MaskVal >> ShiftVal != 0) && ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal && (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal >> ShiftVal, CmpVal >> ShiftVal, SystemZICMP::Any))) { NewC.Op0 = NewC.Op0.getOperand(0); MaskVal >>= ShiftVal; } else if (NewC.ICmpType != SystemZICMP::SignedOnly && NewC.Op0.getOpcode() == ISD::SRL && isSimpleShift(NewC.Op0, ShiftVal) && (MaskVal << ShiftVal != 0) && ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal && (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal << ShiftVal, CmpVal << ShiftVal, SystemZICMP::UnsignedOnly))) { NewC.Op0 = NewC.Op0.getOperand(0); MaskVal <<= ShiftVal; } else { NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal, NewC.ICmpType); if (!NewCCMask) return; } // Go ahead and make the change. C.Opcode = SystemZISD::TM; C.Op0 = NewC.Op0; if (Mask && Mask->getZExtValue() == MaskVal) C.Op1 = SDValue(Mask, 0); else C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType()); C.CCValid = SystemZ::CCMASK_TM; C.CCMask = NewCCMask; } // Implement i128 comparison in vector registers. static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { if (C.Opcode != SystemZISD::ICMP) return; if (C.Op0.getValueType() != MVT::i128) return; // (In-)Equality comparisons can be implemented via VCEQGS. if (C.CCMask == SystemZ::CCMASK_CMP_EQ || C.CCMask == SystemZ::CCMASK_CMP_NE) { C.Opcode = SystemZISD::VICMPES; C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op0); C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op1); C.CCValid = SystemZ::CCMASK_VCMP; if (C.CCMask == SystemZ::CCMASK_CMP_EQ) C.CCMask = SystemZ::CCMASK_VCMP_ALL; else C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid; return; } // Normalize other comparisons to GT. bool Swap = false, Invert = false; switch (C.CCMask) { case SystemZ::CCMASK_CMP_GT: break; case SystemZ::CCMASK_CMP_LT: Swap = true; break; case SystemZ::CCMASK_CMP_LE: Invert = true; break; case SystemZ::CCMASK_CMP_GE: Swap = Invert = true; break; default: llvm_unreachable("Invalid integer condition!"); } if (Swap) std::swap(C.Op0, C.Op1); if (C.ICmpType == SystemZICMP::UnsignedOnly) C.Opcode = SystemZISD::UCMP128HI; else C.Opcode = SystemZISD::SCMP128HI; C.CCValid = SystemZ::CCMASK_ANY; C.CCMask = SystemZ::CCMASK_1; if (Invert) C.CCMask ^= C.CCValid; } // See whether the comparison argument contains a redundant AND // and remove it if so. This sometimes happens due to the generic // BRCOND expansion. static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { if (C.Op0.getOpcode() != ISD::AND) return; auto *Mask = dyn_cast(C.Op0.getOperand(1)); if (!Mask || Mask->getValueSizeInBits(0) > 64) return; KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0)); if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue()) return; C.Op0 = C.Op0.getOperand(0); } // Return a Comparison that tests the condition-code result of intrinsic // node Call against constant integer CC using comparison code Cond. // Opcode is the opcode of the SystemZISD operation for the intrinsic // and CCValid is the set of possible condition-code results. static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, SDValue Call, unsigned CCValid, uint64_t CC, ISD::CondCode Cond) { Comparison C(Call, SDValue(), SDValue()); C.Opcode = Opcode; C.CCValid = CCValid; if (Cond == ISD::SETEQ) // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3. C.CCMask = CC < 4 ? 1 << (3 - CC) : 0; else if (Cond == ISD::SETNE) // ...and the inverse of that. C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1; else if (Cond == ISD::SETLT || Cond == ISD::SETULT) // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3, // always true for CC>3. C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1; else if (Cond == ISD::SETGE || Cond == ISD::SETUGE) // ...and the inverse of that. C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0; else if (Cond == ISD::SETLE || Cond == ISD::SETULE) // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true), // always true for CC>3. C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1; else if (Cond == ISD::SETGT || Cond == ISD::SETUGT) // ...and the inverse of that. C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0; else llvm_unreachable("Unexpected integer comparison type"); C.CCMask &= CCValid; return C; } // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1. static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, ISD::CondCode Cond, const SDLoc &DL, SDValue Chain = SDValue(), bool IsSignaling = false) { if (CmpOp1.getOpcode() == ISD::Constant) { assert(!Chain); unsigned Opcode, CCValid; if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN && CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) && isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid)) return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, CmpOp1->getAsZExtVal(), Cond); if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN && CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 && isIntrinsicWithCC(CmpOp0, Opcode, CCValid)) return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, CmpOp1->getAsZExtVal(), Cond); } Comparison C(CmpOp0, CmpOp1, Chain); C.CCMask = CCMaskForCondCode(Cond); if (C.Op0.getValueType().isFloatingPoint()) { C.CCValid = SystemZ::CCMASK_FCMP; if (!C.Chain) C.Opcode = SystemZISD::FCMP; else if (!IsSignaling) C.Opcode = SystemZISD::STRICT_FCMP; else C.Opcode = SystemZISD::STRICT_FCMPS; adjustForFNeg(C); } else { assert(!C.Chain); C.CCValid = SystemZ::CCMASK_ICMP; C.Opcode = SystemZISD::ICMP; // Choose the type of comparison. Equality and inequality tests can // use either signed or unsigned comparisons. The choice also doesn't // matter if both sign bits are known to be clear. In those cases we // want to give the main isel code the freedom to choose whichever // form fits best. if (C.CCMask == SystemZ::CCMASK_CMP_EQ || C.CCMask == SystemZ::CCMASK_CMP_NE || (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1))) C.ICmpType = SystemZICMP::Any; else if (C.CCMask & SystemZ::CCMASK_CMP_UO) C.ICmpType = SystemZICMP::UnsignedOnly; else C.ICmpType = SystemZICMP::SignedOnly; C.CCMask &= ~SystemZ::CCMASK_CMP_UO; adjustForRedundantAnd(DAG, DL, C); adjustZeroCmp(DAG, DL, C); adjustSubwordCmp(DAG, DL, C); adjustForSubtraction(DAG, DL, C); adjustForLTGFR(C); adjustICmpTruncate(DAG, DL, C); } if (shouldSwapCmpOperands(C)) { std::swap(C.Op0, C.Op1); C.CCMask = SystemZ::reverseCCMask(C.CCMask); } adjustForTestUnderMask(DAG, DL, C); adjustICmp128(DAG, DL, C); return C; } // Emit the comparison instruction described by C. static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { if (!C.Op1.getNode()) { SDNode *Node; switch (C.Op0.getOpcode()) { case ISD::INTRINSIC_W_CHAIN: Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode); return SDValue(Node, 0); case ISD::INTRINSIC_WO_CHAIN: Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode); return SDValue(Node, Node->getNumValues() - 1); default: llvm_unreachable("Invalid comparison operands"); } } if (C.Opcode == SystemZISD::ICMP) return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1, DAG.getTargetConstant(C.ICmpType, DL, MVT::i32)); if (C.Opcode == SystemZISD::TM) { bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) != bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1)); return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1, DAG.getTargetConstant(RegisterOnly, DL, MVT::i32)); } if (C.Opcode == SystemZISD::VICMPES) { SDVTList VTs = DAG.getVTList(C.Op0.getValueType(), MVT::i32); SDValue Val = DAG.getNode(C.Opcode, DL, VTs, C.Op0, C.Op1); return SDValue(Val.getNode(), 1); } if (C.Chain) { SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1); } return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1); } // Implement a 32-bit *MUL_LOHI operation by extending both operands to // 64 bits. Extend is the extension type to use. Store the high part // in Hi and the low part in Lo. static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend, SDValue Op0, SDValue Op1, SDValue &Hi, SDValue &Lo) { Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0); Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1); SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1); Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul, DAG.getConstant(32, DL, MVT::i64)); Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi); Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul); } // Lower a binary operation that produces two VT results, one in each // half of a GR128 pair. Op0 and Op1 are the VT operands to the operation, // and Opcode performs the GR128 operation. Store the even register result // in Even and the odd register result in Odd. static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, unsigned Opcode, SDValue Op0, SDValue Op1, SDValue &Even, SDValue &Odd) { SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1); bool Is32Bit = is32Bit(VT); Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result); Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result); } // Return an i32 value that is 1 if the CC value produced by CCReg is // in the mask CCMask and 0 otherwise. CC is known to have a value // in CCValid, so other values can be ignored. static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, unsigned CCValid, unsigned CCMask) { SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32), DAG.getConstant(0, DL, MVT::i32), DAG.getTargetConstant(CCValid, DL, MVT::i32), DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg}; return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops); } // Return the SystemISD vector comparison operation for CC, or 0 if it cannot // be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP // for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet) // floating-point comparisons, and CmpMode::SignalingFP for strict signaling // floating-point comparisons. enum class CmpMode { Int, FP, StrictFP, SignalingFP }; static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode) { switch (CC) { case ISD::SETOEQ: case ISD::SETEQ: switch (Mode) { case CmpMode::Int: return SystemZISD::VICMPE; case CmpMode::FP: return SystemZISD::VFCMPE; case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE; case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES; } llvm_unreachable("Bad mode"); case ISD::SETOGE: case ISD::SETGE: switch (Mode) { case CmpMode::Int: return 0; case CmpMode::FP: return SystemZISD::VFCMPHE; case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE; case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES; } llvm_unreachable("Bad mode"); case ISD::SETOGT: case ISD::SETGT: switch (Mode) { case CmpMode::Int: return SystemZISD::VICMPH; case CmpMode::FP: return SystemZISD::VFCMPH; case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH; case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS; } llvm_unreachable("Bad mode"); case ISD::SETUGT: switch (Mode) { case CmpMode::Int: return SystemZISD::VICMPHL; case CmpMode::FP: return 0; case CmpMode::StrictFP: return 0; case CmpMode::SignalingFP: return 0; } llvm_unreachable("Bad mode"); default: return 0; } } // Return the SystemZISD vector comparison operation for CC or its inverse, // or 0 if neither can be done directly. Indicate in Invert whether the // result is for the inverse of CC. Mode is as above. static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode, bool &Invert) { if (unsigned Opcode = getVectorComparison(CC, Mode)) { Invert = false; return Opcode; } CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32); if (unsigned Opcode = getVectorComparison(CC, Mode)) { Invert = true; return Opcode; } return 0; } // Return a v2f64 that contains the extended form of elements Start and Start+1 // of v4f32 value Op. If Chain is nonnull, return the strict form. static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, SDValue Op, SDValue Chain) { int Mask[] = { Start, -1, Start + 1, -1 }; Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask); if (Chain) { SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other); return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op); } return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op); } // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode, // producing a result of type VT. If Chain is nonnull, return the strict form. SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &DL, EVT VT, SDValue CmpOp0, SDValue CmpOp1, SDValue Chain) const { // There is no hardware support for v4f32 (unless we have the vector // enhancements facility 1), so extend the vector into two v2f64s // and compare those. if (CmpOp0.getValueType() == MVT::v4f32 && !Subtarget.hasVectorEnhancements1()) { SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain); SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain); SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain); SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain); if (Chain) { SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other); SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1); SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1); SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes); SDValue Chains[6] = { H0.getValue(1), L0.getValue(1), H1.getValue(1), L1.getValue(1), HRes.getValue(1), LRes.getValue(1) }; SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); SDValue Ops[2] = { Res, NewChain }; return DAG.getMergeValues(Ops, DL); } SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1); SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1); return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes); } if (Chain) { SDVTList VTs = DAG.getVTList(VT, MVT::Other); return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1); } return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1); } // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing // an integer mask of type VT. If Chain is nonnull, we have a strict // floating-point comparison. If in addition IsSignaling is true, we have // a strict signaling floating-point comparison. SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG, const SDLoc &DL, EVT VT, ISD::CondCode CC, SDValue CmpOp0, SDValue CmpOp1, SDValue Chain, bool IsSignaling) const { bool IsFP = CmpOp0.getValueType().isFloatingPoint(); assert (!Chain || IsFP); assert (!IsSignaling || Chain); CmpMode Mode = IsSignaling ? CmpMode::SignalingFP : Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int; bool Invert = false; SDValue Cmp; switch (CC) { // Handle tests for order using (or (ogt y x) (oge x y)). case ISD::SETUO: Invert = true; [[fallthrough]]; case ISD::SETO: { assert(IsFP && "Unexpected integer comparison"); SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode), DL, VT, CmpOp1, CmpOp0, Chain); SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode), DL, VT, CmpOp0, CmpOp1, Chain); Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE); if (Chain) Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, LT.getValue(1), GE.getValue(1)); break; } // Handle <> tests using (or (ogt y x) (ogt x y)). case ISD::SETUEQ: Invert = true; [[fallthrough]]; case ISD::SETONE: { assert(IsFP && "Unexpected integer comparison"); SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode), DL, VT, CmpOp1, CmpOp0, Chain); SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode), DL, VT, CmpOp0, CmpOp1, Chain); Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT); if (Chain) Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, LT.getValue(1), GT.getValue(1)); break; } // Otherwise a single comparison is enough. It doesn't really // matter whether we try the inversion or the swap first, since // there are no cases where both work. default: if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert)) Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain); else { CC = ISD::getSetCCSwappedOperands(CC); if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert)) Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain); else llvm_unreachable("Unhandled comparison"); } if (Chain) Chain = Cmp.getValue(1); break; } if (Invert) { SDValue Mask = DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64)); Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask); } if (Chain && Chain.getNode() != Cmp.getNode()) { SDValue Ops[2] = { Cmp, Chain }; Cmp = DAG.getMergeValues(Ops, DL); } return Cmp; } SDValue SystemZTargetLowering::lowerSETCC(SDValue Op, SelectionDAG &DAG) const { SDValue CmpOp0 = Op.getOperand(0); SDValue CmpOp1 = Op.getOperand(1); ISD::CondCode CC = cast(Op.getOperand(2))->get(); SDLoc DL(Op); EVT VT = Op.getValueType(); if (VT.isVector()) return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1); Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); SDValue CCReg = emitCmp(DAG, DL, C); return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask); } SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op, SelectionDAG &DAG, bool IsSignaling) const { SDValue Chain = Op.getOperand(0); SDValue CmpOp0 = Op.getOperand(1); SDValue CmpOp1 = Op.getOperand(2); ISD::CondCode CC = cast(Op.getOperand(3))->get(); SDLoc DL(Op); EVT VT = Op.getNode()->getValueType(0); if (VT.isVector()) { SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1, Chain, IsSignaling); return Res.getValue(Op.getResNo()); } Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling)); SDValue CCReg = emitCmp(DAG, DL, C); CCReg->setFlags(Op->getFlags()); SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask); SDValue Ops[2] = { Result, CCReg.getValue(1) }; return DAG.getMergeValues(Ops, DL); } SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { ISD::CondCode CC = cast(Op.getOperand(1))->get(); SDValue CmpOp0 = Op.getOperand(2); SDValue CmpOp1 = Op.getOperand(3); SDValue Dest = Op.getOperand(4); SDLoc DL(Op); Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); SDValue CCReg = emitCmp(DAG, DL, C); return DAG.getNode( SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0), DAG.getTargetConstant(C.CCValid, DL, MVT::i32), DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg); } // Return true if Pos is CmpOp and Neg is the negative of CmpOp, // allowing Pos and Neg to be wider than CmpOp. static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) { return (Neg.getOpcode() == ISD::SUB && Neg.getOperand(0).getOpcode() == ISD::Constant && Neg.getConstantOperandVal(0) == 0 && Neg.getOperand(1) == Pos && (Pos == CmpOp || (Pos.getOpcode() == ISD::SIGN_EXTEND && Pos.getOperand(0) == CmpOp))); } // Return the absolute or negative absolute of Op; IsNegative decides which. static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op, bool IsNegative) { Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op); if (IsNegative) Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(), DAG.getConstant(0, DL, Op.getValueType()), Op); return Op; } SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDValue CmpOp0 = Op.getOperand(0); SDValue CmpOp1 = Op.getOperand(1); SDValue TrueOp = Op.getOperand(2); SDValue FalseOp = Op.getOperand(3); ISD::CondCode CC = cast(Op.getOperand(4))->get(); SDLoc DL(Op); Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); // Check for absolute and negative-absolute selections, including those // where the comparison value is sign-extended (for LPGFR and LNGFR). // This check supplements the one in DAGCombiner. if (C.Opcode == SystemZISD::ICMP && C.CCMask != SystemZ::CCMASK_CMP_EQ && C.CCMask != SystemZ::CCMASK_CMP_NE && C.Op1.getOpcode() == ISD::Constant && cast(C.Op1)->getValueSizeInBits(0) <= 64 && C.Op1->getAsZExtVal() == 0) { if (isAbsolute(C.Op0, TrueOp, FalseOp)) return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT); if (isAbsolute(C.Op0, FalseOp, TrueOp)) return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT); } SDValue CCReg = emitCmp(DAG, DL, C); SDValue Ops[] = {TrueOp, FalseOp, DAG.getTargetConstant(C.CCValid, DL, MVT::i32), DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg}; return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops); } SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node, SelectionDAG &DAG) const { SDLoc DL(Node); const GlobalValue *GV = Node->getGlobal(); int64_t Offset = Node->getOffset(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); CodeModel::Model CM = DAG.getTarget().getCodeModel(); SDValue Result; if (Subtarget.isPC32DBLSymbol(GV, CM)) { if (isInt<32>(Offset)) { // Assign anchors at 1<<12 byte boundaries. uint64_t Anchor = Offset & ~uint64_t(0xfff); Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor); Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); // The offset can be folded into the address if it is aligned to a // halfword. Offset -= Anchor; if (Offset != 0 && (Offset & 1) == 0) { SDValue Full = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset); Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result); Offset = 0; } } else { // Conservatively load a constant offset greater than 32 bits into a // register below. Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT); Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); } } else if (Subtarget.isTargetELF()) { Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT); Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result, MachinePointerInfo::getGOT(DAG.getMachineFunction())); } else if (Subtarget.isTargetzOS()) { Result = getADAEntry(DAG, GV, DL, PtrVT); } else llvm_unreachable("Unexpected Subtarget"); // If there was a non-zero offset that we didn't fold, create an explicit // addition for it. if (Offset != 0) Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result, DAG.getConstant(Offset, DL, PtrVT)); return Result; } SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node, SelectionDAG &DAG, unsigned Opcode, SDValue GOTOffset) const { SDLoc DL(Node); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Chain = DAG.getEntryNode(); SDValue Glue; if (DAG.getMachineFunction().getFunction().getCallingConv() == CallingConv::GHC) report_fatal_error("In GHC calling convention TLS is not supported"); // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12. SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue); Glue = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue); Glue = Chain.getValue(1); // The first call operand is the chain and the second is the TLS symbol. SmallVector Ops; Ops.push_back(Chain); Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL, Node->getValueType(0), 0, 0)); // Add argument registers to the end of the list so that they are // known live into the call. Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT)); Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT)); // Add a register mask operand representing the call-preserved registers. const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); const uint32_t *Mask = TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); // Glue the call to the argument copies. Ops.push_back(Glue); // Emit the call. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); Chain = DAG.getNode(Opcode, DL, NodeTys, Ops); Glue = Chain.getValue(1); // Copy the return value from %r2. return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue); } SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL, SelectionDAG &DAG) const { SDValue Chain = DAG.getEntryNode(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); // The high part of the thread pointer is in access register 0. SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32); TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi); // The low part of the thread pointer is in access register 1. SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32); TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo); // Merge them into a single 64-bit address. SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi, DAG.getConstant(32, DL, PtrVT)); return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo); } SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, SelectionDAG &DAG) const { if (DAG.getTarget().useEmulatedTLS()) return LowerToTLSEmulatedModel(Node, DAG); SDLoc DL(Node); const GlobalValue *GV = Node->getGlobal(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); TLSModel::Model model = DAG.getTarget().getTLSModel(GV); if (DAG.getMachineFunction().getFunction().getCallingConv() == CallingConv::GHC) report_fatal_error("In GHC calling convention TLS is not supported"); SDValue TP = lowerThreadPointer(DL, DAG); // Get the offset of GA from the thread pointer, based on the TLS model. SDValue Offset; switch (model) { case TLSModel::GeneralDynamic: { // Load the GOT offset of the tls_index (module ID / per-symbol offset). SystemZConstantPoolValue *CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD); Offset = DAG.getConstantPool(CPV, PtrVT, Align(8)); Offset = DAG.getLoad( PtrVT, DL, DAG.getEntryNode(), Offset, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); // Call __tls_get_offset to retrieve the offset. Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset); break; } case TLSModel::LocalDynamic: { // Load the GOT offset of the module ID. SystemZConstantPoolValue *CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM); Offset = DAG.getConstantPool(CPV, PtrVT, Align(8)); Offset = DAG.getLoad( PtrVT, DL, DAG.getEntryNode(), Offset, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); // Call __tls_get_offset to retrieve the module base offset. Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset); // Note: The SystemZLDCleanupPass will remove redundant computations // of the module base offset. Count total number of local-dynamic // accesses to trigger execution of that pass. SystemZMachineFunctionInfo* MFI = DAG.getMachineFunction().getInfo(); MFI->incNumLocalDynamicTLSAccesses(); // Add the per-symbol offset. CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF); SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8)); DTPOffset = DAG.getLoad( PtrVT, DL, DAG.getEntryNode(), DTPOffset, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset); break; } case TLSModel::InitialExec: { // Load the offset from the GOT. Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_INDNTPOFF); Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset); Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset, MachinePointerInfo::getGOT(DAG.getMachineFunction())); break; } case TLSModel::LocalExec: { // Force the offset into the constant pool and load it from there. SystemZConstantPoolValue *CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF); Offset = DAG.getConstantPool(CPV, PtrVT, Align(8)); Offset = DAG.getLoad( PtrVT, DL, DAG.getEntryNode(), Offset, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); break; } } // Add the base and offset together. return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset); } SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node, SelectionDAG &DAG) const { SDLoc DL(Node); const BlockAddress *BA = Node->getBlockAddress(); int64_t Offset = Node->getOffset(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset); Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); return Result; } SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT, SelectionDAG &DAG) const { SDLoc DL(JT); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); // Use LARL to load the address of the table. return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); } SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP, SelectionDAG &DAG) const { SDLoc DL(CP); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Result; if (CP->isMachineConstantPoolEntry()) Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign()); else Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(), CP->getOffset()); // Use LARL to load the address of the constant pool entry. return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); } SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { auto *TFL = Subtarget.getFrameLowering(); MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); MFI.setFrameAddressIsTaken(true); SDLoc DL(Op); unsigned Depth = Op.getConstantOperandVal(0); EVT PtrVT = getPointerTy(DAG.getDataLayout()); // By definition, the frame address is the address of the back chain. (In // the case of packed stack without backchain, return the address where the // backchain would have been stored. This will either be an unused space or // contain a saved register). int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF); SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT); if (Depth > 0) { // FIXME The frontend should detect this case. if (!MF.getSubtarget().hasBackChain()) report_fatal_error("Unsupported stack frame traversal count"); SDValue Offset = DAG.getConstant(TFL->getBackchainOffset(MF), DL, PtrVT); while (Depth--) { BackChain = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), BackChain, MachinePointerInfo()); BackChain = DAG.getNode(ISD::ADD, DL, PtrVT, BackChain, Offset); } } return BackChain; } SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); MFI.setReturnAddressIsTaken(true); if (verifyReturnAddressArgumentIsConstant(Op, DAG)) return SDValue(); SDLoc DL(Op); unsigned Depth = Op.getConstantOperandVal(0); EVT PtrVT = getPointerTy(DAG.getDataLayout()); if (Depth > 0) { // FIXME The frontend should detect this case. if (!MF.getSubtarget().hasBackChain()) report_fatal_error("Unsupported stack frame traversal count"); SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); const auto *TFL = Subtarget.getFrameLowering(); int Offset = TFL->getReturnAddressOffset(MF); SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, FrameAddr, DAG.getConstant(Offset, DL, PtrVT)); return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); } // Return R14D (Elf) / R7D (XPLINK), which has the return address. Mark it an // implicit live-in. SystemZCallingConventionRegisters *CCR = Subtarget.getSpecialRegisters(); Register LinkReg = MF.addLiveIn(CCR->getReturnFunctionAddressRegister(), &SystemZ::GR64BitRegClass); return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT); } SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); SDValue In = Op.getOperand(0); EVT InVT = In.getValueType(); EVT ResVT = Op.getValueType(); // Convert loads directly. This is normally done by DAGCombiner, // but we need this case for bitcasts that are created during lowering // and which are then lowered themselves. if (auto *LoadN = dyn_cast(In)) if (ISD::isNormalLoad(LoadN)) { SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(), LoadN->getBasePtr(), LoadN->getMemOperand()); // Update the chain uses. DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1)); return NewLoad; } if (InVT == MVT::i32 && ResVT == MVT::f32) { SDValue In64; if (Subtarget.hasHighWord()) { SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64); In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL, MVT::i64, SDValue(U64, 0), In); } else { In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In); In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64, DAG.getConstant(32, DL, MVT::i64)); } SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64); return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL, MVT::f32, Out64); } if (InVT == MVT::f32 && ResVT == MVT::i32) { SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64); SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL, MVT::f64, SDValue(U64, 0), In); SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64); if (Subtarget.hasHighWord()) return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL, MVT::i32, Out64); SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64, DAG.getConstant(32, DL, MVT::i64)); return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift); } llvm_unreachable("Unexpected bitcast combination"); } SDValue SystemZTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { if (Subtarget.isTargetXPLINK64()) return lowerVASTART_XPLINK(Op, DAG); else return lowerVASTART_ELF(Op, DAG); } SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); SystemZMachineFunctionInfo *FuncInfo = MF.getInfo(); SDLoc DL(Op); // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); const Value *SV = cast(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1), MachinePointerInfo(SV)); } SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); SystemZMachineFunctionInfo *FuncInfo = MF.getInfo(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Chain = Op.getOperand(0); SDValue Addr = Op.getOperand(1); const Value *SV = cast(Op.getOperand(2))->getValue(); SDLoc DL(Op); // The initial values of each field. const unsigned NumFields = 4; SDValue Fields[NumFields] = { DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT), DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT), DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT), DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT) }; // Store each field into its respective slot. SDValue MemOps[NumFields]; unsigned Offset = 0; for (unsigned I = 0; I < NumFields; ++I) { SDValue FieldAddr = Addr; if (Offset != 0) FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr, DAG.getIntPtrConstant(Offset, DL)); MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr, MachinePointerInfo(SV, Offset)); Offset += 8; } return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); } SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue DstPtr = Op.getOperand(1); SDValue SrcPtr = Op.getOperand(2); const Value *DstSV = cast(Op.getOperand(3))->getValue(); const Value *SrcSV = cast(Op.getOperand(4))->getValue(); SDLoc DL(Op); uint32_t Sz = Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32; return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL), Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false, /*CI=*/nullptr, std::nullopt, MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); } SDValue SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { if (Subtarget.isTargetXPLINK64()) return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG); else return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG); } SDValue SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op, SelectionDAG &DAG) const { const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); MachineFunction &MF = DAG.getMachineFunction(); bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack"); SDValue Chain = Op.getOperand(0); SDValue Size = Op.getOperand(1); SDValue Align = Op.getOperand(2); SDLoc DL(Op); // If user has set the no alignment function attribute, ignore // alloca alignments. uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0); uint64_t StackAlign = TFI->getStackAlignment(); uint64_t RequiredAlign = std::max(AlignVal, StackAlign); uint64_t ExtraAlignSpace = RequiredAlign - StackAlign; SDValue NeededSpace = Size; // Add extra space for alignment if needed. EVT PtrVT = getPointerTy(MF.getDataLayout()); if (ExtraAlignSpace) NeededSpace = DAG.getNode(ISD::ADD, DL, PtrVT, NeededSpace, DAG.getConstant(ExtraAlignSpace, DL, PtrVT)); bool IsSigned = false; bool DoesNotReturn = false; bool IsReturnValueUsed = false; EVT VT = Op.getValueType(); SDValue AllocaCall = makeExternalCall(Chain, DAG, "@@ALCAXP", VT, ArrayRef(NeededSpace), CallingConv::C, IsSigned, DL, DoesNotReturn, IsReturnValueUsed) .first; // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue // to end of call in order to ensure it isn't broken up from the call // sequence. auto &Regs = Subtarget.getSpecialRegisters(); Register SPReg = Regs.getStackPointerRegister(); Chain = AllocaCall.getValue(1); SDValue Glue = AllocaCall.getValue(2); SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, DL, SPReg, PtrVT, Glue); Chain = NewSPRegNode.getValue(1); MVT PtrMVT = getPointerMemTy(MF.getDataLayout()); SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, PtrMVT); SDValue Result = DAG.getNode(ISD::ADD, DL, PtrMVT, NewSPRegNode, ArgAdjust); // Dynamically realign if needed. if (ExtraAlignSpace) { Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result, DAG.getConstant(ExtraAlignSpace, DL, PtrVT)); Result = DAG.getNode(ISD::AND, DL, PtrVT, Result, DAG.getConstant(~(RequiredAlign - 1), DL, PtrVT)); } SDValue Ops[2] = {Result, Chain}; return DAG.getMergeValues(Ops, DL); } SDValue SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op, SelectionDAG &DAG) const { const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); MachineFunction &MF = DAG.getMachineFunction(); bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack"); bool StoreBackchain = MF.getSubtarget().hasBackChain(); SDValue Chain = Op.getOperand(0); SDValue Size = Op.getOperand(1); SDValue Align = Op.getOperand(2); SDLoc DL(Op); // If user has set the no alignment function attribute, ignore // alloca alignments. uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0); uint64_t StackAlign = TFI->getStackAlignment(); uint64_t RequiredAlign = std::max(AlignVal, StackAlign); uint64_t ExtraAlignSpace = RequiredAlign - StackAlign; Register SPReg = getStackPointerRegisterToSaveRestore(); SDValue NeededSpace = Size; // Get a reference to the stack pointer. SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64); // If we need a backchain, save it now. SDValue Backchain; if (StoreBackchain) Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG), MachinePointerInfo()); // Add extra space for alignment if needed. if (ExtraAlignSpace) NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace, DAG.getConstant(ExtraAlignSpace, DL, MVT::i64)); // Get the new stack pointer value. SDValue NewSP; if (hasInlineStackProbe(MF)) { NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL, DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace); Chain = NewSP.getValue(1); } else { NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace); // Copy the new stack pointer back. Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP); } // The allocated data lives above the 160 bytes allocated for the standard // frame, plus any outgoing stack arguments. We don't know how much that // amounts to yet, so emit a special ADJDYNALLOC placeholder. SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64); SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust); // Dynamically realign if needed. if (RequiredAlign > StackAlign) { Result = DAG.getNode(ISD::ADD, DL, MVT::i64, Result, DAG.getConstant(ExtraAlignSpace, DL, MVT::i64)); Result = DAG.getNode(ISD::AND, DL, MVT::i64, Result, DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64)); } if (StoreBackchain) Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG), MachinePointerInfo()); SDValue Ops[2] = { Result, Chain }; return DAG.getMergeValues(Ops, DL); } SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET( SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64); } SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc DL(Op); SDValue Ops[2]; if (is32Bit(VT)) // Just do a normal 64-bit multiplication and extract the results. // We define this so that it can be used for constant division. lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); else if (Subtarget.hasMiscellaneousExtensions2()) // SystemZISD::SMUL_LOHI returns the low result in the odd register and // the high result in the even register. ISD::SMUL_LOHI is defined to // return the low half first, so the results are in reverse order. lowerGR128Binary(DAG, DL, VT, SystemZISD::SMUL_LOHI, Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); else { // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI: // // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64) // // but using the fact that the upper halves are either all zeros // or all ones: // // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64) // // and grouping the right terms together since they are quicker than the // multiplication: // // (ll * rl) - (((lh & rl) + (ll & rh)) << 64) SDValue C63 = DAG.getConstant(63, DL, MVT::i64); SDValue LL = Op.getOperand(0); SDValue RL = Op.getOperand(1); SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63); SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63); // SystemZISD::UMUL_LOHI returns the low result in the odd register and // the high result in the even register. ISD::SMUL_LOHI is defined to // return the low half first, so the results are in reverse order. lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI, LL, RL, Ops[1], Ops[0]); SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH); SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL); SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL); Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum); } return DAG.getMergeValues(Ops, DL); } SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc DL(Op); SDValue Ops[2]; if (is32Bit(VT)) // Just do a normal 64-bit multiplication and extract the results. // We define this so that it can be used for constant division. lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); else // SystemZISD::UMUL_LOHI returns the low result in the odd register and // the high result in the even register. ISD::UMUL_LOHI is defined to // return the low half first, so the results are in reverse order. lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI, Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); return DAG.getMergeValues(Ops, DL); } SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op, SelectionDAG &DAG) const { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); EVT VT = Op.getValueType(); SDLoc DL(Op); // We use DSGF for 32-bit division. This means the first operand must // always be 64-bit, and the second operand should be 32-bit whenever // that is possible, to improve performance. if (is32Bit(VT)) Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0); else if (DAG.ComputeNumSignBits(Op1) > 32) Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1); // DSG(F) returns the remainder in the even register and the // quotient in the odd register. SDValue Ops[2]; lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]); return DAG.getMergeValues(Ops, DL); } SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc DL(Op); // DL(G) returns the remainder in the even register and the // quotient in the odd register. SDValue Ops[2]; lowerGR128Binary(DAG, DL, VT, SystemZISD::UDIVREM, Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); return DAG.getMergeValues(Ops, DL); } SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const { assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation"); // Get the known-zero masks for each operand. SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)}; KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]), DAG.computeKnownBits(Ops[1])}; // See if the upper 32 bits of one operand and the lower 32 bits of the // other are known zero. They are the low and high operands respectively. uint64_t Masks[] = { Known[0].Zero.getZExtValue(), Known[1].Zero.getZExtValue() }; unsigned High, Low; if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff) High = 1, Low = 0; else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff) High = 0, Low = 1; else return Op; SDValue LowOp = Ops[Low]; SDValue HighOp = Ops[High]; // If the high part is a constant, we're better off using IILH. if (HighOp.getOpcode() == ISD::Constant) return Op; // If the low part is a constant that is outside the range of LHI, // then we're better off using IILF. if (LowOp.getOpcode() == ISD::Constant) { int64_t Value = int32_t(LowOp->getAsZExtVal()); if (!isInt<16>(Value)) return Op; } // Check whether the high part is an AND that doesn't change the // high 32 bits and just masks out low bits. We can skip it if so. if (HighOp.getOpcode() == ISD::AND && HighOp.getOperand(1).getOpcode() == ISD::Constant) { SDValue HighOp0 = HighOp.getOperand(0); uint64_t Mask = HighOp.getConstantOperandVal(1); if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff)))) HighOp = HighOp0; } // Take advantage of the fact that all GR32 operations only change the // low 32 bits by truncating Low to an i32 and inserting it directly // using a subreg. The interesting cases are those where the truncation // can be folded. SDLoc DL(Op); SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp); return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL, MVT::i64, HighOp, Low32); } // Lower SADDO/SSUBO/UADDO/USUBO nodes. SDValue SystemZTargetLowering::lowerXALUO(SDValue Op, SelectionDAG &DAG) const { SDNode *N = Op.getNode(); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); SDLoc DL(N); if (N->getValueType(0) == MVT::i128) { unsigned BaseOp = 0; unsigned FlagOp = 0; bool IsBorrow = false; switch (Op.getOpcode()) { default: llvm_unreachable("Unknown instruction!"); case ISD::UADDO: BaseOp = ISD::ADD; FlagOp = SystemZISD::VACC; break; case ISD::USUBO: BaseOp = ISD::SUB; FlagOp = SystemZISD::VSCBI; IsBorrow = true; break; } SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS); SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS); Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag, DAG.getValueType(MVT::i1)); Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1)); if (IsBorrow) Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(), Flag, DAG.getConstant(1, DL, Flag.getValueType())); return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag); } unsigned BaseOp = 0; unsigned CCValid = 0; unsigned CCMask = 0; switch (Op.getOpcode()) { default: llvm_unreachable("Unknown instruction!"); case ISD::SADDO: BaseOp = SystemZISD::SADDO; CCValid = SystemZ::CCMASK_ARITH; CCMask = SystemZ::CCMASK_ARITH_OVERFLOW; break; case ISD::SSUBO: BaseOp = SystemZISD::SSUBO; CCValid = SystemZ::CCMASK_ARITH; CCMask = SystemZ::CCMASK_ARITH_OVERFLOW; break; case ISD::UADDO: BaseOp = SystemZISD::UADDO; CCValid = SystemZ::CCMASK_LOGICAL; CCMask = SystemZ::CCMASK_LOGICAL_CARRY; break; case ISD::USUBO: BaseOp = SystemZISD::USUBO; CCValid = SystemZ::CCMASK_LOGICAL; CCMask = SystemZ::CCMASK_LOGICAL_BORROW; break; } SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32); SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS); SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask); if (N->getValueType(1) == MVT::i1) SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC); return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC); } static bool isAddCarryChain(SDValue Carry) { while (Carry.getOpcode() == ISD::UADDO_CARRY) Carry = Carry.getOperand(2); return Carry.getOpcode() == ISD::UADDO; } static bool isSubBorrowChain(SDValue Carry) { while (Carry.getOpcode() == ISD::USUBO_CARRY) Carry = Carry.getOperand(2); return Carry.getOpcode() == ISD::USUBO; } // Lower UADDO_CARRY/USUBO_CARRY nodes. SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG) const { SDNode *N = Op.getNode(); MVT VT = N->getSimpleValueType(0); // Let legalize expand this if it isn't a legal type yet. if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) return SDValue(); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); SDValue Carry = Op.getOperand(2); SDLoc DL(N); if (VT == MVT::i128) { unsigned BaseOp = 0; unsigned FlagOp = 0; bool IsBorrow = false; switch (Op.getOpcode()) { default: llvm_unreachable("Unknown instruction!"); case ISD::UADDO_CARRY: BaseOp = SystemZISD::VAC; FlagOp = SystemZISD::VACCC; break; case ISD::USUBO_CARRY: BaseOp = SystemZISD::VSBI; FlagOp = SystemZISD::VSBCBI; IsBorrow = true; break; } if (IsBorrow) Carry = DAG.getNode(ISD::XOR, DL, Carry.getValueType(), Carry, DAG.getConstant(1, DL, Carry.getValueType())); Carry = DAG.getZExtOrTrunc(Carry, DL, MVT::i128); SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS, Carry); SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS, Carry); Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag, DAG.getValueType(MVT::i1)); Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1)); if (IsBorrow) Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(), Flag, DAG.getConstant(1, DL, Flag.getValueType())); return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag); } unsigned BaseOp = 0; unsigned CCValid = 0; unsigned CCMask = 0; switch (Op.getOpcode()) { default: llvm_unreachable("Unknown instruction!"); case ISD::UADDO_CARRY: if (!isAddCarryChain(Carry)) return SDValue(); BaseOp = SystemZISD::ADDCARRY; CCValid = SystemZ::CCMASK_LOGICAL; CCMask = SystemZ::CCMASK_LOGICAL_CARRY; break; case ISD::USUBO_CARRY: if (!isSubBorrowChain(Carry)) return SDValue(); BaseOp = SystemZISD::SUBCARRY; CCValid = SystemZ::CCMASK_LOGICAL; CCMask = SystemZ::CCMASK_LOGICAL_BORROW; break; } // Set the condition code from the carry flag. Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry, DAG.getConstant(CCValid, DL, MVT::i32), DAG.getConstant(CCMask, DL, MVT::i32)); SDVTList VTs = DAG.getVTList(VT, MVT::i32); SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry); SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask); if (N->getValueType(1) == MVT::i1) SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC); return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC); } SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc DL(Op); Op = Op.getOperand(0); if (VT.getScalarSizeInBits() == 128) { Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op); Op = DAG.getNode(ISD::CTPOP, DL, MVT::v2i64, Op); SDValue Tmp = DAG.getSplatBuildVector(MVT::v2i64, DL, DAG.getConstant(0, DL, MVT::i64)); Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp); return Op; } // Handle vector types via VPOPCT. if (VT.isVector()) { Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op); Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op); switch (VT.getScalarSizeInBits()) { case 8: break; case 16: { Op = DAG.getNode(ISD::BITCAST, DL, VT, Op); SDValue Shift = DAG.getConstant(8, DL, MVT::i32); SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift); Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp); Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift); break; } case 32: { SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL, DAG.getConstant(0, DL, MVT::i32)); Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp); break; } case 64: { SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL, DAG.getConstant(0, DL, MVT::i32)); Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp); Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp); break; } default: llvm_unreachable("Unexpected type"); } return Op; } // Get the known-zero mask for the operand. KnownBits Known = DAG.computeKnownBits(Op); unsigned NumSignificantBits = Known.getMaxValue().getActiveBits(); if (NumSignificantBits == 0) return DAG.getConstant(0, DL, VT); // Skip known-zero high parts of the operand. int64_t OrigBitSize = VT.getSizeInBits(); int64_t BitSize = llvm::bit_ceil(NumSignificantBits); BitSize = std::min(BitSize, OrigBitSize); // The POPCNT instruction counts the number of bits in each byte. Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op); Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op); Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op); // Add up per-byte counts in a binary tree. All bits of Op at // position larger than BitSize remain zero throughout. for (int64_t I = BitSize / 2; I >= 8; I = I / 2) { SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT)); if (BitSize != OrigBitSize) Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp, DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT)); Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp); } // Extract overall result from high byte. if (BitSize > 8) Op = DAG.getNode(ISD::SRL, DL, VT, Op, DAG.getConstant(BitSize - 8, DL, VT)); return Op; } SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); AtomicOrdering FenceOrdering = static_cast(Op.getConstantOperandVal(1)); SyncScope::ID FenceSSID = static_cast(Op.getConstantOperandVal(2)); // The only fence that needs an instruction is a sequentially-consistent // cross-thread fence. if (FenceOrdering == AtomicOrdering::SequentiallyConsistent && FenceSSID == SyncScope::System) { return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other, Op.getOperand(0)), 0); } // MEMBARRIER is a compiler barrier; it codegens to a no-op. return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); } SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op, SelectionDAG &DAG) const { auto *Node = cast(Op.getNode()); assert( (Node->getMemoryVT() == MVT::i128 || Node->getMemoryVT() == MVT::f128) && "Only custom lowering i128 or f128."); // Use same code to handle both legal and non-legal i128 types. SmallVector Results; LowerOperationWrapper(Node, Results, DAG); return DAG.getMergeValues(Results, SDLoc(Op)); } // Prepare for a Compare And Swap for a subword operation. This needs to be // done in memory with 4 bytes at natural alignment. static void getCSAddressAndShifts(SDValue Addr, SelectionDAG &DAG, SDLoc DL, SDValue &AlignedAddr, SDValue &BitShift, SDValue &NegBitShift) { EVT PtrVT = Addr.getValueType(); EVT WideVT = MVT::i32; // Get the address of the containing word. AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr, DAG.getConstant(-4, DL, PtrVT)); // Get the number of bits that the word must be rotated left in order // to bring the field to the top bits of a GR32. BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr, DAG.getConstant(3, DL, PtrVT)); BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift); // Get the complementing shift amount, for rotating a field in the top // bits back to its proper position. NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT, DAG.getConstant(0, DL, WideVT), BitShift); } // Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first // two into the fullword ATOMIC_LOADW_* operation given by Opcode. SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG, unsigned Opcode) const { auto *Node = cast(Op.getNode()); // 32-bit operations need no special handling. EVT NarrowVT = Node->getMemoryVT(); EVT WideVT = MVT::i32; if (NarrowVT == WideVT) return Op; int64_t BitSize = NarrowVT.getSizeInBits(); SDValue ChainIn = Node->getChain(); SDValue Addr = Node->getBasePtr(); SDValue Src2 = Node->getVal(); MachineMemOperand *MMO = Node->getMemOperand(); SDLoc DL(Node); // Convert atomic subtracts of constants into additions. if (Opcode == SystemZISD::ATOMIC_LOADW_SUB) if (auto *Const = dyn_cast(Src2)) { Opcode = SystemZISD::ATOMIC_LOADW_ADD; Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType()); } SDValue AlignedAddr, BitShift, NegBitShift; getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift); // Extend the source operand to 32 bits and prepare it for the inner loop. // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other // operations require the source to be shifted in advance. (This shift // can be folded if the source is constant.) For AND and NAND, the lower // bits must be set, while for other opcodes they should be left clear. if (Opcode != SystemZISD::ATOMIC_SWAPW) Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2, DAG.getConstant(32 - BitSize, DL, WideVT)); if (Opcode == SystemZISD::ATOMIC_LOADW_AND || Opcode == SystemZISD::ATOMIC_LOADW_NAND) Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2, DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT)); // Construct the ATOMIC_LOADW_* node. SDVTList VTList = DAG.getVTList(WideVT, MVT::Other); SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift, DAG.getConstant(BitSize, DL, WideVT) }; SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops, NarrowVT, MMO); // Rotate the result of the final CS so that the field is in the lower // bits of a GR32, then truncate it. SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift, DAG.getConstant(BitSize, DL, WideVT)); SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift); SDValue RetOps[2] = { Result, AtomicOp.getValue(1) }; return DAG.getMergeValues(RetOps, DL); } // Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations into // ATOMIC_LOADW_SUBs and convert 32- and 64-bit operations into additions. SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const { auto *Node = cast(Op.getNode()); EVT MemVT = Node->getMemoryVT(); if (MemVT == MVT::i32 || MemVT == MVT::i64) { // A full-width operation: negate and use LAA(G). assert(Op.getValueType() == MemVT && "Mismatched VTs"); assert(Subtarget.hasInterlockedAccess1() && "Should have been expanded by AtomicExpand pass."); SDValue Src2 = Node->getVal(); SDLoc DL(Src2); SDValue NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), Src2); return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT, Node->getChain(), Node->getBasePtr(), NegSrc2, Node->getMemOperand()); } return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB); } // Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node. SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const { auto *Node = cast(Op.getNode()); SDValue ChainIn = Node->getOperand(0); SDValue Addr = Node->getOperand(1); SDValue CmpVal = Node->getOperand(2); SDValue SwapVal = Node->getOperand(3); MachineMemOperand *MMO = Node->getMemOperand(); SDLoc DL(Node); if (Node->getMemoryVT() == MVT::i128) { // Use same code to handle both legal and non-legal i128 types. SmallVector Results; LowerOperationWrapper(Node, Results, DAG); return DAG.getMergeValues(Results, DL); } // We have native support for 32-bit and 64-bit compare and swap, but we // still need to expand extracting the "success" result from the CC. EVT NarrowVT = Node->getMemoryVT(); EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32; if (NarrowVT == WideVT) { SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other); SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal }; SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP, DL, Tys, Ops, NarrowVT, MMO); SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1), SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ); DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0)); DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success); DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2)); return SDValue(); } // Convert 8-bit and 16-bit compare and swap to a loop, implemented // via a fullword ATOMIC_CMP_SWAPW operation. int64_t BitSize = NarrowVT.getSizeInBits(); SDValue AlignedAddr, BitShift, NegBitShift; getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift); // Construct the ATOMIC_CMP_SWAPW node. SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other); SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift, NegBitShift, DAG.getConstant(BitSize, DL, WideVT) }; SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL, VTList, Ops, NarrowVT, MMO); SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1), SystemZ::CCMASK_ICMP, SystemZ::CCMASK_CMP_EQ); // emitAtomicCmpSwapW() will zero extend the result (original value). SDValue OrigVal = DAG.getNode(ISD::AssertZext, DL, WideVT, AtomicOp.getValue(0), DAG.getValueType(NarrowVT)); DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), OrigVal); DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success); DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2)); return SDValue(); } MachineMemOperand::Flags SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const { // Because of how we convert atomic_load and atomic_store to normal loads and // stores in the DAG, we need to ensure that the MMOs are marked volatile // since DAGCombine hasn't been updated to account for atomic, but non // volatile loads. (See D57601) if (auto *SI = dyn_cast(&I)) if (SI->isAtomic()) return MachineMemOperand::MOVolatile; if (auto *LI = dyn_cast(&I)) if (LI->isAtomic()) return MachineMemOperand::MOVolatile; if (auto *AI = dyn_cast(&I)) if (AI->isAtomic()) return MachineMemOperand::MOVolatile; if (auto *AI = dyn_cast(&I)) if (AI->isAtomic()) return MachineMemOperand::MOVolatile; return MachineMemOperand::MONone; } SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); auto *Regs = Subtarget.getSpecialRegisters(); if (MF.getFunction().getCallingConv() == CallingConv::GHC) report_fatal_error("Variable-sized stack allocations are not supported " "in GHC calling convention"); return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op), Regs->getStackPointerRegister(), Op.getValueType()); } SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); auto *Regs = Subtarget.getSpecialRegisters(); bool StoreBackchain = MF.getSubtarget().hasBackChain(); if (MF.getFunction().getCallingConv() == CallingConv::GHC) report_fatal_error("Variable-sized stack allocations are not supported " "in GHC calling convention"); SDValue Chain = Op.getOperand(0); SDValue NewSP = Op.getOperand(1); SDValue Backchain; SDLoc DL(Op); if (StoreBackchain) { SDValue OldSP = DAG.getCopyFromReg( Chain, DL, Regs->getStackPointerRegister(), MVT::i64); Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG), MachinePointerInfo()); } Chain = DAG.getCopyToReg(Chain, DL, Regs->getStackPointerRegister(), NewSP); if (StoreBackchain) Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG), MachinePointerInfo()); return Chain; } SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const { bool IsData = Op.getConstantOperandVal(4); if (!IsData) // Just preserve the chain. return Op.getOperand(0); SDLoc DL(Op); bool IsWrite = Op.getConstantOperandVal(2); unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ; auto *Node = cast(Op.getNode()); SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32), Op.getOperand(1)}; return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL, Node->getVTList(), Ops, Node->getMemoryVT(), Node->getMemOperand()); } // Convert condition code in CCReg to an i32 value. static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg) { SDLoc DL(CCReg); SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg); return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32)); } SDValue SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const { unsigned Opcode, CCValid; if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) { assert(Op->getNumValues() == 2 && "Expected only CC result and chain"); SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode); SDValue CC = getCCResult(DAG, SDValue(Node, 0)); DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC); return SDValue(); } return SDValue(); } SDValue SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { unsigned Opcode, CCValid; if (isIntrinsicWithCC(Op, Opcode, CCValid)) { SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode); if (Op->getNumValues() == 1) return getCCResult(DAG, SDValue(Node, 0)); assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result"); return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(), SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1))); } unsigned Id = Op.getConstantOperandVal(0); switch (Id) { case Intrinsic::thread_pointer: return lowerThreadPointer(SDLoc(Op), DAG); case Intrinsic::s390_vpdi: return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); case Intrinsic::s390_vperm: return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); case Intrinsic::s390_vuphb: case Intrinsic::s390_vuphh: case Intrinsic::s390_vuphf: return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(), Op.getOperand(1)); case Intrinsic::s390_vuplhb: case Intrinsic::s390_vuplhh: case Intrinsic::s390_vuplhf: return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(), Op.getOperand(1)); case Intrinsic::s390_vuplb: case Intrinsic::s390_vuplhw: case Intrinsic::s390_vuplf: return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(), Op.getOperand(1)); case Intrinsic::s390_vupllb: case Intrinsic::s390_vupllh: case Intrinsic::s390_vupllf: return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(), Op.getOperand(1)); case Intrinsic::s390_vsumb: case Intrinsic::s390_vsumh: case Intrinsic::s390_vsumgh: case Intrinsic::s390_vsumgf: case Intrinsic::s390_vsumqf: case Intrinsic::s390_vsumqg: return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); case Intrinsic::s390_vaq: return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); case Intrinsic::s390_vaccb: case Intrinsic::s390_vacch: case Intrinsic::s390_vaccf: case Intrinsic::s390_vaccg: case Intrinsic::s390_vaccq: return DAG.getNode(SystemZISD::VACC, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); case Intrinsic::s390_vacq: return DAG.getNode(SystemZISD::VAC, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); case Intrinsic::s390_vacccq: return DAG.getNode(SystemZISD::VACCC, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); case Intrinsic::s390_vsq: return DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); case Intrinsic::s390_vscbib: case Intrinsic::s390_vscbih: case Intrinsic::s390_vscbif: case Intrinsic::s390_vscbig: case Intrinsic::s390_vscbiq: return DAG.getNode(SystemZISD::VSCBI, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); case Intrinsic::s390_vsbiq: return DAG.getNode(SystemZISD::VSBI, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); case Intrinsic::s390_vsbcbiq: return DAG.getNode(SystemZISD::VSBCBI, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); } return SDValue(); } namespace { // Says that SystemZISD operation Opcode can be used to perform the equivalent // of a VPERM with permute vector Bytes. If Opcode takes three operands, // Operand is the constant third operand, otherwise it is the number of // bytes in each element of the result. struct Permute { unsigned Opcode; unsigned Operand; unsigned char Bytes[SystemZ::VectorBytes]; }; } static const Permute PermuteForms[] = { // VMRHG { SystemZISD::MERGE_HIGH, 8, { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } }, // VMRHF { SystemZISD::MERGE_HIGH, 4, { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } }, // VMRHH { SystemZISD::MERGE_HIGH, 2, { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } }, // VMRHB { SystemZISD::MERGE_HIGH, 1, { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } }, // VMRLG { SystemZISD::MERGE_LOW, 8, { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } }, // VMRLF { SystemZISD::MERGE_LOW, 4, { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } }, // VMRLH { SystemZISD::MERGE_LOW, 2, { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } }, // VMRLB { SystemZISD::MERGE_LOW, 1, { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } }, // VPKG { SystemZISD::PACK, 4, { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } }, // VPKF { SystemZISD::PACK, 2, { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } }, // VPKH { SystemZISD::PACK, 1, { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } }, // VPDI V1, V2, 4 (low half of V1, high half of V2) { SystemZISD::PERMUTE_DWORDS, 4, { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } }, // VPDI V1, V2, 1 (high half of V1, low half of V2) { SystemZISD::PERMUTE_DWORDS, 1, { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } } }; // Called after matching a vector shuffle against a particular pattern. // Both the original shuffle and the pattern have two vector operands. // OpNos[0] is the operand of the original shuffle that should be used for // operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything. // OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and // set OpNo0 and OpNo1 to the shuffle operands that should actually be used // for operands 0 and 1 of the pattern. static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) { if (OpNos[0] < 0) { if (OpNos[1] < 0) return false; OpNo0 = OpNo1 = OpNos[1]; } else if (OpNos[1] < 0) { OpNo0 = OpNo1 = OpNos[0]; } else { OpNo0 = OpNos[0]; OpNo1 = OpNos[1]; } return true; } // Bytes is a VPERM-like permute vector, except that -1 is used for // undefined bytes. Return true if the VPERM can be implemented using P. // When returning true set OpNo0 to the VPERM operand that should be // used for operand 0 of P and likewise OpNo1 for operand 1 of P. // // For example, if swapping the VPERM operands allows P to match, OpNo0 // will be 1 and OpNo1 will be 0. If instead Bytes only refers to one // operand, but rewriting it to use two duplicated operands allows it to // match P, then OpNo0 and OpNo1 will be the same. static bool matchPermute(const SmallVectorImpl &Bytes, const Permute &P, unsigned &OpNo0, unsigned &OpNo1) { int OpNos[] = { -1, -1 }; for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) { int Elt = Bytes[I]; if (Elt >= 0) { // Make sure that the two permute vectors use the same suboperand // byte number. Only the operand numbers (the high bits) are // allowed to differ. if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1)) return false; int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes; int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes; // Make sure that the operand mappings are consistent with previous // elements. if (OpNos[ModelOpNo] == 1 - RealOpNo) return false; OpNos[ModelOpNo] = RealOpNo; } } return chooseShuffleOpNos(OpNos, OpNo0, OpNo1); } // As above, but search for a matching permute. static const Permute *matchPermute(const SmallVectorImpl &Bytes, unsigned &OpNo0, unsigned &OpNo1) { for (auto &P : PermuteForms) if (matchPermute(Bytes, P, OpNo0, OpNo1)) return &P; return nullptr; } // Bytes is a VPERM-like permute vector, except that -1 is used for // undefined bytes. This permute is an operand of an outer permute. // See whether redistributing the -1 bytes gives a shuffle that can be // implemented using P. If so, set Transform to a VPERM-like permute vector // that, when applied to the result of P, gives the original permute in Bytes. static bool matchDoublePermute(const SmallVectorImpl &Bytes, const Permute &P, SmallVectorImpl &Transform) { unsigned To = 0; for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) { int Elt = Bytes[From]; if (Elt < 0) // Byte number From of the result is undefined. Transform[From] = -1; else { while (P.Bytes[To] != Elt) { To += 1; if (To == SystemZ::VectorBytes) return false; } Transform[From] = To; } } return true; } // As above, but search for a matching permute. static const Permute *matchDoublePermute(const SmallVectorImpl &Bytes, SmallVectorImpl &Transform) { for (auto &P : PermuteForms) if (matchDoublePermute(Bytes, P, Transform)) return &P; return nullptr; } // Convert the mask of the given shuffle op into a byte-level mask, // as if it had type vNi8. static bool getVPermMask(SDValue ShuffleOp, SmallVectorImpl &Bytes) { EVT VT = ShuffleOp.getValueType(); unsigned NumElements = VT.getVectorNumElements(); unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); if (auto *VSN = dyn_cast(ShuffleOp)) { Bytes.resize(NumElements * BytesPerElement, -1); for (unsigned I = 0; I < NumElements; ++I) { int Index = VSN->getMaskElt(I); if (Index >= 0) for (unsigned J = 0; J < BytesPerElement; ++J) Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J; } return true; } if (SystemZISD::SPLAT == ShuffleOp.getOpcode() && isa(ShuffleOp.getOperand(1))) { unsigned Index = ShuffleOp.getConstantOperandVal(1); Bytes.resize(NumElements * BytesPerElement, -1); for (unsigned I = 0; I < NumElements; ++I) for (unsigned J = 0; J < BytesPerElement; ++J) Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J; return true; } return false; } // Bytes is a VPERM-like permute vector, except that -1 is used for // undefined bytes. See whether bytes [Start, Start + BytesPerElement) of // the result come from a contiguous sequence of bytes from one input. // Set Base to the selector for the first byte if so. static bool getShuffleInput(const SmallVectorImpl &Bytes, unsigned Start, unsigned BytesPerElement, int &Base) { Base = -1; for (unsigned I = 0; I < BytesPerElement; ++I) { if (Bytes[Start + I] >= 0) { unsigned Elem = Bytes[Start + I]; if (Base < 0) { Base = Elem - I; // Make sure the bytes would come from one input operand. if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size()) return false; } else if (unsigned(Base) != Elem - I) return false; } } return true; } // Bytes is a VPERM-like permute vector, except that -1 is used for // undefined bytes. Return true if it can be performed using VSLDB. // When returning true, set StartIndex to the shift amount and OpNo0 // and OpNo1 to the VPERM operands that should be used as the first // and second shift operand respectively. static bool isShlDoublePermute(const SmallVectorImpl &Bytes, unsigned &StartIndex, unsigned &OpNo0, unsigned &OpNo1) { int OpNos[] = { -1, -1 }; int Shift = -1; for (unsigned I = 0; I < 16; ++I) { int Index = Bytes[I]; if (Index >= 0) { int ExpectedShift = (Index - I) % SystemZ::VectorBytes; int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes; int RealOpNo = unsigned(Index) / SystemZ::VectorBytes; if (Shift < 0) Shift = ExpectedShift; else if (Shift != ExpectedShift) return false; // Make sure that the operand mappings are consistent with previous // elements. if (OpNos[ModelOpNo] == 1 - RealOpNo) return false; OpNos[ModelOpNo] = RealOpNo; } } StartIndex = Shift; return chooseShuffleOpNos(OpNos, OpNo0, OpNo1); } // Create a node that performs P on operands Op0 and Op1, casting the // operands to the appropriate type. The type of the result is determined by P. static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, const Permute &P, SDValue Op0, SDValue Op1) { // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input // elements of a PACK are twice as wide as the outputs. unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 : P.Opcode == SystemZISD::PACK ? P.Operand * 2 : P.Operand); // Cast both operands to the appropriate type. MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8), SystemZ::VectorBytes / InBytes); Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0); Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1); SDValue Op; if (P.Opcode == SystemZISD::PERMUTE_DWORDS) { SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32); Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2); } else if (P.Opcode == SystemZISD::PACK) { MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8), SystemZ::VectorBytes / P.Operand); Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1); } else { Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1); } return Op; } static bool isZeroVector(SDValue N) { if (N->getOpcode() == ISD::BITCAST) N = N->getOperand(0); if (N->getOpcode() == ISD::SPLAT_VECTOR) if (auto *Op = dyn_cast(N->getOperand(0))) return Op->getZExtValue() == 0; return ISD::isBuildVectorAllZeros(N.getNode()); } // Return the index of the zero/undef vector, or UINT32_MAX if not found. static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) { for (unsigned I = 0; I < Num ; I++) if (isZeroVector(Ops[I])) return I; return UINT32_MAX; } // Bytes is a VPERM-like permute vector, except that -1 is used for // undefined bytes. Implement it on operands Ops[0] and Ops[1] using // VSLDB or VPERM. static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, SDValue *Ops, const SmallVectorImpl &Bytes) { for (unsigned I = 0; I < 2; ++I) Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]); // First see whether VSLDB can be used. unsigned StartIndex, OpNo0, OpNo1; if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1)) return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0], Ops[OpNo1], DAG.getTargetConstant(StartIndex, DL, MVT::i32)); // Fall back on VPERM. Construct an SDNode for the permute vector. Try to // eliminate a zero vector by reusing any zero index in the permute vector. unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2); if (ZeroVecIdx != UINT32_MAX) { bool MaskFirst = true; int ZeroIdx = -1; for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) { unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes; unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes; if (OpNo == ZeroVecIdx && I == 0) { // If the first byte is zero, use mask as first operand. ZeroIdx = 0; break; } if (OpNo != ZeroVecIdx && Byte == 0) { // If mask contains a zero, use it by placing that vector first. ZeroIdx = I + SystemZ::VectorBytes; MaskFirst = false; break; } } if (ZeroIdx != -1) { SDValue IndexNodes[SystemZ::VectorBytes]; for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) { if (Bytes[I] >= 0) { unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes; unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes; if (OpNo == ZeroVecIdx) IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32); else { unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte; IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32); } } else IndexNodes[I] = DAG.getUNDEF(MVT::i32); } SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes); SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0]; if (MaskFirst) return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src, Mask); else return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask, Mask); } } SDValue IndexNodes[SystemZ::VectorBytes]; for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) if (Bytes[I] >= 0) IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32); else IndexNodes[I] = DAG.getUNDEF(MVT::i32); SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes); return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2); } namespace { // Describes a general N-operand vector shuffle. struct GeneralShuffle { GeneralShuffle(EVT vt) : VT(vt), UnpackFromEltSize(UINT_MAX) {} void addUndef(); bool add(SDValue, unsigned); SDValue getNode(SelectionDAG &, const SDLoc &); void tryPrepareForUnpack(); bool unpackWasPrepared() { return UnpackFromEltSize <= 4; } SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op); // The operands of the shuffle. SmallVector Ops; // Index I is -1 if byte I of the result is undefined. Otherwise the // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand // Bytes[I] / SystemZ::VectorBytes. SmallVector Bytes; // The type of the shuffle result. EVT VT; // Holds a value of 1, 2 or 4 if a final unpack has been prepared for. unsigned UnpackFromEltSize; }; } // Add an extra undefined element to the shuffle. void GeneralShuffle::addUndef() { unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); for (unsigned I = 0; I < BytesPerElement; ++I) Bytes.push_back(-1); } // Add an extra element to the shuffle, taking it from element Elem of Op. // A null Op indicates a vector input whose value will be calculated later; // there is at most one such input per shuffle and it always has the same // type as the result. Aborts and returns false if the source vector elements // of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per // LLVM they become implicitly extended, but this is rare and not optimized. bool GeneralShuffle::add(SDValue Op, unsigned Elem) { unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); // The source vector can have wider elements than the result, // either through an explicit TRUNCATE or because of type legalization. // We want the least significant part. EVT FromVT = Op.getNode() ? Op.getValueType() : VT; unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize(); // Return false if the source elements are smaller than their destination // elements. if (FromBytesPerElement < BytesPerElement) return false; unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes + (FromBytesPerElement - BytesPerElement)); // Look through things like shuffles and bitcasts. while (Op.getNode()) { if (Op.getOpcode() == ISD::BITCAST) Op = Op.getOperand(0); else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) { // See whether the bytes we need come from a contiguous part of one // operand. SmallVector OpBytes; if (!getVPermMask(Op, OpBytes)) break; int NewByte; if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte)) break; if (NewByte < 0) { addUndef(); return true; } Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes); Byte = unsigned(NewByte) % SystemZ::VectorBytes; } else if (Op.isUndef()) { addUndef(); return true; } else break; } // Make sure that the source of the extraction is in Ops. unsigned OpNo = 0; for (; OpNo < Ops.size(); ++OpNo) if (Ops[OpNo] == Op) break; if (OpNo == Ops.size()) Ops.push_back(Op); // Add the element to Bytes. unsigned Base = OpNo * SystemZ::VectorBytes + Byte; for (unsigned I = 0; I < BytesPerElement; ++I) Bytes.push_back(Base + I); return true; } // Return SDNodes for the completed shuffle. SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) { assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector"); if (Ops.size() == 0) return DAG.getUNDEF(VT); // Use a single unpack if possible as the last operation. tryPrepareForUnpack(); // Make sure that there are at least two shuffle operands. if (Ops.size() == 1) Ops.push_back(DAG.getUNDEF(MVT::v16i8)); // Create a tree of shuffles, deferring root node until after the loop. // Try to redistribute the undefined elements of non-root nodes so that // the non-root shuffles match something like a pack or merge, then adjust // the parent node's permute vector to compensate for the new order. // Among other things, this copes with vectors like <2 x i16> that were // padded with undefined elements during type legalization. // // In the best case this redistribution will lead to the whole tree // using packs and merges. It should rarely be a loss in other cases. unsigned Stride = 1; for (; Stride * 2 < Ops.size(); Stride *= 2) { for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) { SDValue SubOps[] = { Ops[I], Ops[I + Stride] }; // Create a mask for just these two operands. SmallVector NewBytes(SystemZ::VectorBytes); for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) { unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes; unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes; if (OpNo == I) NewBytes[J] = Byte; else if (OpNo == I + Stride) NewBytes[J] = SystemZ::VectorBytes + Byte; else NewBytes[J] = -1; } // See if it would be better to reorganize NewMask to avoid using VPERM. SmallVector NewBytesMap(SystemZ::VectorBytes); if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) { Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]); // Applying NewBytesMap to Ops[I] gets back to NewBytes. for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) { if (NewBytes[J] >= 0) { assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes && "Invalid double permute"); Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J]; } else assert(NewBytesMap[J] < 0 && "Invalid double permute"); } } else { // Just use NewBytes on the operands. Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes); for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) if (NewBytes[J] >= 0) Bytes[J] = I * SystemZ::VectorBytes + J; } } } // Now we just have 2 inputs. Put the second operand in Ops[1]. if (Stride > 1) { Ops[1] = Ops[Stride]; for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) if (Bytes[I] >= int(SystemZ::VectorBytes)) Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes; } // Look for an instruction that can do the permute without resorting // to VPERM. unsigned OpNo0, OpNo1; SDValue Op; if (unpackWasPrepared() && Ops[1].isUndef()) Op = Ops[0]; else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1)) Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]); else Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes); Op = insertUnpackIfPrepared(DAG, DL, Op); return DAG.getNode(ISD::BITCAST, DL, VT, Op); } #ifndef NDEBUG static void dumpBytes(const SmallVectorImpl &Bytes, std::string Msg) { dbgs() << Msg.c_str() << " { "; for (unsigned i = 0; i < Bytes.size(); i++) dbgs() << Bytes[i] << " "; dbgs() << "}\n"; } #endif // If the Bytes vector matches an unpack operation, prepare to do the unpack // after all else by removing the zero vector and the effect of the unpack on // Bytes. void GeneralShuffle::tryPrepareForUnpack() { uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size()); if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1) return; // Only do this if removing the zero vector reduces the depth, otherwise // the critical path will increase with the final unpack. if (Ops.size() > 2 && Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1)) return; // Find an unpack that would allow removing the zero vector from Ops. UnpackFromEltSize = 1; for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) { bool MatchUnpack = true; SmallVector SrcBytes; for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) { unsigned ToEltSize = UnpackFromEltSize * 2; bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize; if (!IsZextByte) SrcBytes.push_back(Bytes[Elt]); if (Bytes[Elt] != -1) { unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes; if (IsZextByte != (OpNo == ZeroVecOpNo)) { MatchUnpack = false; break; } } } if (MatchUnpack) { if (Ops.size() == 2) { // Don't use unpack if a single source operand needs rearrangement. for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++) if (SrcBytes[i] != -1 && SrcBytes[i] % 16 != int(i)) { UnpackFromEltSize = UINT_MAX; return; } } break; } } if (UnpackFromEltSize > 4) return; LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size " << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo << ".\n"; dumpBytes(Bytes, "Original Bytes vector:");); // Apply the unpack in reverse to the Bytes array. unsigned B = 0; for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) { Elt += UnpackFromEltSize; for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++) Bytes[B] = Bytes[Elt]; } while (B < SystemZ::VectorBytes) Bytes[B++] = -1; // Remove the zero vector from Ops Ops.erase(&Ops[ZeroVecOpNo]); for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) if (Bytes[I] >= 0) { unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes; if (OpNo > ZeroVecOpNo) Bytes[I] -= SystemZ::VectorBytes; } LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:"); dbgs() << "\n";); } SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op) { if (!unpackWasPrepared()) return Op; unsigned InBits = UnpackFromEltSize * 8; EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits), SystemZ::VectorBits / InBits); SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op); unsigned OutBits = InBits * 2; EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits), SystemZ::VectorBits / OutBits); return DAG.getNode(SystemZISD::UNPACKL_HIGH, DL, OutVT, PackedOp); } // Return true if the given BUILD_VECTOR is a scalar-to-vector conversion. static bool isScalarToVector(SDValue Op) { for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I) if (!Op.getOperand(I).isUndef()) return false; return true; } // Return a vector of type VT that contains Value in the first element. // The other elements don't matter. static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Value) { // If we have a constant, replicate it to all elements and let the // BUILD_VECTOR lowering take care of it. if (Value.getOpcode() == ISD::Constant || Value.getOpcode() == ISD::ConstantFP) { SmallVector Ops(VT.getVectorNumElements(), Value); return DAG.getBuildVector(VT, DL, Ops); } if (Value.isUndef()) return DAG.getUNDEF(VT); return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value); } // Return a vector of type VT in which Op0 is in element 0 and Op1 is in // element 1. Used for cases in which replication is cheap. static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op0, SDValue Op1) { if (Op0.isUndef()) { if (Op1.isUndef()) return DAG.getUNDEF(VT); return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1); } if (Op1.isUndef()) return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0); return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT, buildScalarToVector(DAG, DL, VT, Op0), buildScalarToVector(DAG, DL, VT, Op1)); } // Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64 // vector for them. static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0, SDValue Op1) { if (Op0.isUndef() && Op1.isUndef()) return DAG.getUNDEF(MVT::v2i64); // If one of the two inputs is undefined then replicate the other one, // in order to avoid using another register unnecessarily. if (Op0.isUndef()) Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1); else if (Op1.isUndef()) Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); else { Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1); } return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1); } // If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually // better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for // the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR // would benefit from this representation and return it if so. static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, BuildVectorSDNode *BVN) { EVT VT = BVN->getValueType(0); unsigned NumElements = VT.getVectorNumElements(); // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still // need a BUILD_VECTOR, add an additional placeholder operand for that // BUILD_VECTOR and store its operands in ResidueOps. GeneralShuffle GS(VT); SmallVector ResidueOps; bool FoundOne = false; for (unsigned I = 0; I < NumElements; ++I) { SDValue Op = BVN->getOperand(I); if (Op.getOpcode() == ISD::TRUNCATE) Op = Op.getOperand(0); if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && Op.getOperand(1).getOpcode() == ISD::Constant) { unsigned Elem = Op.getConstantOperandVal(1); if (!GS.add(Op.getOperand(0), Elem)) return SDValue(); FoundOne = true; } else if (Op.isUndef()) { GS.addUndef(); } else { if (!GS.add(SDValue(), ResidueOps.size())) return SDValue(); ResidueOps.push_back(BVN->getOperand(I)); } } // Nothing to do if there are no EXTRACT_VECTOR_ELTs. if (!FoundOne) return SDValue(); // Create the BUILD_VECTOR for the remaining elements, if any. if (!ResidueOps.empty()) { while (ResidueOps.size() < NumElements) ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType())); for (auto &Op : GS.Ops) { if (!Op.getNode()) { Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps); break; } } } return GS.getNode(DAG, SDLoc(BVN)); } bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const { if (Op.getOpcode() == ISD::LOAD && cast(Op)->isUnindexed()) return true; if (auto *AL = dyn_cast(Op)) if (AL->getOpcode() == ISD::ATOMIC_LOAD) return true; if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV) return true; return false; } // Combine GPR scalar values Elems into a vector of type VT. SDValue SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SmallVectorImpl &Elems) const { // See whether there is a single replicated value. SDValue Single; unsigned int NumElements = Elems.size(); unsigned int Count = 0; for (auto Elem : Elems) { if (!Elem.isUndef()) { if (!Single.getNode()) Single = Elem; else if (Elem != Single) { Single = SDValue(); break; } Count += 1; } } // There are three cases here: // // - if the only defined element is a loaded one, the best sequence // is a replicating load. // // - otherwise, if the only defined element is an i64 value, we will // end up with the same VLVGP sequence regardless of whether we short-cut // for replication or fall through to the later code. // // - otherwise, if the only defined element is an i32 or smaller value, // we would need 2 instructions to replicate it: VLVGP followed by VREPx. // This is only a win if the single defined element is used more than once. // In other cases we're better off using a single VLVGx. if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single))) return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single); // If all elements are loads, use VLREP/VLEs (below). bool AllLoads = true; for (auto Elem : Elems) if (!isVectorElementLoad(Elem)) { AllLoads = false; break; } // The best way of building a v2i64 from two i64s is to use VLVGP. if (VT == MVT::v2i64 && !AllLoads) return joinDwords(DAG, DL, Elems[0], Elems[1]); // Use a 64-bit merge high to combine two doubles. if (VT == MVT::v2f64 && !AllLoads) return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]); // Build v4f32 values directly from the FPRs: // // // V V VMRHF // // V VMRHG // if (VT == MVT::v4f32 && !AllLoads) { SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]); SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]); // Avoid unnecessary undefs by reusing the other operand. if (Op01.isUndef()) Op01 = Op23; else if (Op23.isUndef()) Op23 = Op01; // Merging identical replications is a no-op. if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23) return Op01; Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01); Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23); SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH, DL, MVT::v2i64, Op01, Op23); return DAG.getNode(ISD::BITCAST, DL, VT, Op); } // Collect the constant terms. SmallVector Constants(NumElements, SDValue()); SmallVector Done(NumElements, false); unsigned NumConstants = 0; for (unsigned I = 0; I < NumElements; ++I) { SDValue Elem = Elems[I]; if (Elem.getOpcode() == ISD::Constant || Elem.getOpcode() == ISD::ConstantFP) { NumConstants += 1; Constants[I] = Elem; Done[I] = true; } } // If there was at least one constant, fill in the other elements of // Constants with undefs to get a full vector constant and use that // as the starting point. SDValue Result; SDValue ReplicatedVal; if (NumConstants > 0) { for (unsigned I = 0; I < NumElements; ++I) if (!Constants[I].getNode()) Constants[I] = DAG.getUNDEF(Elems[I].getValueType()); Result = DAG.getBuildVector(VT, DL, Constants); } else { // Otherwise try to use VLREP or VLVGP to start the sequence in order to // avoid a false dependency on any previous contents of the vector // register. // Use a VLREP if at least one element is a load. Make sure to replicate // the load with the most elements having its value. std::map UseCounts; SDNode *LoadMaxUses = nullptr; for (unsigned I = 0; I < NumElements; ++I) if (isVectorElementLoad(Elems[I])) { SDNode *Ld = Elems[I].getNode(); UseCounts[Ld]++; if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld]) LoadMaxUses = Ld; } if (LoadMaxUses != nullptr) { ReplicatedVal = SDValue(LoadMaxUses, 0); Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal); } else { // Try to use VLVGP. unsigned I1 = NumElements / 2 - 1; unsigned I2 = NumElements - 1; bool Def1 = !Elems[I1].isUndef(); bool Def2 = !Elems[I2].isUndef(); if (Def1 || Def2) { SDValue Elem1 = Elems[Def1 ? I1 : I2]; SDValue Elem2 = Elems[Def2 ? I2 : I1]; Result = DAG.getNode(ISD::BITCAST, DL, VT, joinDwords(DAG, DL, Elem1, Elem2)); Done[I1] = true; Done[I2] = true; } else Result = DAG.getUNDEF(VT); } } // Use VLVGx to insert the other elements. for (unsigned I = 0; I < NumElements; ++I) if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal) Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I], DAG.getConstant(I, DL, MVT::i32)); return Result; } SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { auto *BVN = cast(Op.getNode()); SDLoc DL(Op); EVT VT = Op.getValueType(); if (BVN->isConstant()) { if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget)) return Op; // Fall back to loading it from memory. return SDValue(); } // See if we should use shuffles to construct the vector from other vectors. if (SDValue Res = tryBuildVectorShuffle(DAG, BVN)) return Res; // Detect SCALAR_TO_VECTOR conversions. if (isOperationLegal(ISD::SCALAR_TO_VECTOR, VT) && isScalarToVector(Op)) return buildScalarToVector(DAG, DL, VT, Op.getOperand(0)); // Otherwise use buildVector to build the vector up from GPRs. unsigned NumElements = Op.getNumOperands(); SmallVector Ops(NumElements); for (unsigned I = 0; I < NumElements; ++I) Ops[I] = Op.getOperand(I); return buildVector(DAG, DL, VT, Ops); } SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { auto *VSN = cast(Op.getNode()); SDLoc DL(Op); EVT VT = Op.getValueType(); unsigned NumElements = VT.getVectorNumElements(); if (VSN->isSplat()) { SDValue Op0 = Op.getOperand(0); unsigned Index = VSN->getSplatIndex(); assert(Index < VT.getVectorNumElements() && "Splat index should be defined and in first operand"); // See whether the value we're splatting is directly available as a scalar. if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) || Op0.getOpcode() == ISD::BUILD_VECTOR) return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index)); // Otherwise keep it as a vector-to-vector operation. return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0), DAG.getTargetConstant(Index, DL, MVT::i32)); } GeneralShuffle GS(VT); for (unsigned I = 0; I < NumElements; ++I) { int Elt = VSN->getMaskElt(I); if (Elt < 0) GS.addUndef(); else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements), unsigned(Elt) % NumElements)) return SDValue(); } return GS.getNode(DAG, SDLoc(VSN)); } SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); // Just insert the scalar into element 0 of an undefined vector. return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(), DAG.getUNDEF(Op.getValueType()), Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32)); } SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { // Handle insertions of floating-point values. SDLoc DL(Op); SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDValue Op2 = Op.getOperand(2); EVT VT = Op.getValueType(); // Insertions into constant indices of a v2f64 can be done using VPDI. // However, if the inserted value is a bitcast or a constant then it's // better to use GPRs, as below. if (VT == MVT::v2f64 && Op1.getOpcode() != ISD::BITCAST && Op1.getOpcode() != ISD::ConstantFP && Op2.getOpcode() == ISD::Constant) { uint64_t Index = Op2->getAsZExtVal(); unsigned Mask = VT.getVectorNumElements() - 1; if (Index <= Mask) return Op; } // Otherwise bitcast to the equivalent integer form and insert via a GPR. MVT IntVT = MVT::getIntegerVT(VT.getScalarSizeInBits()); MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements()); SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT, DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2); return DAG.getNode(ISD::BITCAST, DL, VT, Res); } SDValue SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { // Handle extractions of floating-point values. SDLoc DL(Op); SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); EVT VT = Op.getValueType(); EVT VecVT = Op0.getValueType(); // Extractions of constant indices can be done directly. if (auto *CIndexN = dyn_cast(Op1)) { uint64_t Index = CIndexN->getZExtValue(); unsigned Mask = VecVT.getVectorNumElements() - 1; if (Index <= Mask) return Op; } // Otherwise bitcast to the equivalent integer form and extract via a GPR. MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits()); MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements()); SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT, DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1); return DAG.getNode(ISD::BITCAST, DL, VT, Res); } SDValue SystemZTargetLowering:: lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const { SDValue PackedOp = Op.getOperand(0); EVT OutVT = Op.getValueType(); EVT InVT = PackedOp.getValueType(); unsigned ToBits = OutVT.getScalarSizeInBits(); unsigned FromBits = InVT.getScalarSizeInBits(); do { FromBits *= 2; EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits), SystemZ::VectorBits / FromBits); PackedOp = DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(PackedOp), OutVT, PackedOp); } while (FromBits != ToBits); return PackedOp; } // Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector. SDValue SystemZTargetLowering:: lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const { SDValue PackedOp = Op.getOperand(0); SDLoc DL(Op); EVT OutVT = Op.getValueType(); EVT InVT = PackedOp.getValueType(); unsigned InNumElts = InVT.getVectorNumElements(); unsigned OutNumElts = OutVT.getVectorNumElements(); unsigned NumInPerOut = InNumElts / OutNumElts; SDValue ZeroVec = DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType())); SmallVector Mask(InNumElts); unsigned ZeroVecElt = InNumElts; for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) { unsigned MaskElt = PackedElt * NumInPerOut; unsigned End = MaskElt + NumInPerOut - 1; for (; MaskElt < End; MaskElt++) Mask[MaskElt] = ZeroVecElt++; Mask[MaskElt] = PackedElt; } SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask); return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf); } SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const { // Look for cases where a vector shift can use the *_BY_SCALAR form. SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDLoc DL(Op); EVT VT = Op.getValueType(); unsigned ElemBitSize = VT.getScalarSizeInBits(); // See whether the shift vector is a splat represented as BUILD_VECTOR. if (auto *BVN = dyn_cast(Op1)) { APInt SplatBits, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; // Check for constant splats. Use ElemBitSize as the minimum element // width and reject splats that need wider elements. if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, ElemBitSize, true) && SplatBitSize == ElemBitSize) { SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff, DL, MVT::i32); return DAG.getNode(ByScalar, DL, VT, Op0, Shift); } // Check for variable splats. BitVector UndefElements; SDValue Splat = BVN->getSplatValue(&UndefElements); if (Splat) { // Since i32 is the smallest legal type, we either need a no-op // or a truncation. SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat); return DAG.getNode(ByScalar, DL, VT, Op0, Shift); } } // See whether the shift vector is a splat represented as SHUFFLE_VECTOR, // and the shift amount is directly available in a GPR. if (auto *VSN = dyn_cast(Op1)) { if (VSN->isSplat()) { SDValue VSNOp0 = VSN->getOperand(0); unsigned Index = VSN->getSplatIndex(); assert(Index < VT.getVectorNumElements() && "Splat index should be defined and in first operand"); if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) || VSNOp0.getOpcode() == ISD::BUILD_VECTOR) { // Since i32 is the smallest legal type, we either need a no-op // or a truncation. SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, VSNOp0.getOperand(Index)); return DAG.getNode(ByScalar, DL, VT, Op0, Shift); } } } // Otherwise just treat the current form as legal. return Op; } SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); MVT ResultVT = Op.getSimpleValueType(); SDValue Arg = Op.getOperand(0); unsigned Check = Op.getConstantOperandVal(1); unsigned TDCMask = 0; if (Check & fcSNan) TDCMask |= SystemZ::TDCMASK_SNAN_PLUS | SystemZ::TDCMASK_SNAN_MINUS; if (Check & fcQNan) TDCMask |= SystemZ::TDCMASK_QNAN_PLUS | SystemZ::TDCMASK_QNAN_MINUS; if (Check & fcPosInf) TDCMask |= SystemZ::TDCMASK_INFINITY_PLUS; if (Check & fcNegInf) TDCMask |= SystemZ::TDCMASK_INFINITY_MINUS; if (Check & fcPosNormal) TDCMask |= SystemZ::TDCMASK_NORMAL_PLUS; if (Check & fcNegNormal) TDCMask |= SystemZ::TDCMASK_NORMAL_MINUS; if (Check & fcPosSubnormal) TDCMask |= SystemZ::TDCMASK_SUBNORMAL_PLUS; if (Check & fcNegSubnormal) TDCMask |= SystemZ::TDCMASK_SUBNORMAL_MINUS; if (Check & fcPosZero) TDCMask |= SystemZ::TDCMASK_ZERO_PLUS; if (Check & fcNegZero) TDCMask |= SystemZ::TDCMASK_ZERO_MINUS; SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, MVT::i64); SDValue Intr = DAG.getNode(SystemZISD::TDC, DL, ResultVT, Arg, TDCMaskV); return getCCResult(DAG, Intr); } SDValue SystemZTargetLowering::lowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); SDValue Chain = Op.getOperand(0); // STCKF only supports a memory operand, so we have to use a temporary. SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64); int SPFI = cast(StackPtr.getNode())->getIndex(); MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); // Use STCFK to store the TOD clock into the temporary. SDValue StoreOps[] = {Chain, StackPtr}; Chain = DAG.getMemIntrinsicNode( SystemZISD::STCKF, DL, DAG.getVTList(MVT::Other), StoreOps, MVT::i64, MPI, MaybeAlign(), MachineMemOperand::MOStore); // And read it back from there. return DAG.getLoad(MVT::i64, DL, Chain, StackPtr, MPI); } SDValue SystemZTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { case ISD::FRAMEADDR: return lowerFRAMEADDR(Op, DAG); case ISD::RETURNADDR: return lowerRETURNADDR(Op, DAG); case ISD::BR_CC: return lowerBR_CC(Op, DAG); case ISD::SELECT_CC: return lowerSELECT_CC(Op, DAG); case ISD::SETCC: return lowerSETCC(Op, DAG); case ISD::STRICT_FSETCC: return lowerSTRICT_FSETCC(Op, DAG, false); case ISD::STRICT_FSETCCS: return lowerSTRICT_FSETCC(Op, DAG, true); case ISD::GlobalAddress: return lowerGlobalAddress(cast(Op), DAG); case ISD::GlobalTLSAddress: return lowerGlobalTLSAddress(cast(Op), DAG); case ISD::BlockAddress: return lowerBlockAddress(cast(Op), DAG); case ISD::JumpTable: return lowerJumpTable(cast(Op), DAG); case ISD::ConstantPool: return lowerConstantPool(cast(Op), DAG); case ISD::BITCAST: return lowerBITCAST(Op, DAG); case ISD::VASTART: return lowerVASTART(Op, DAG); case ISD::VACOPY: return lowerVACOPY(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return lowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::GET_DYNAMIC_AREA_OFFSET: return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG); case ISD::SMUL_LOHI: return lowerSMUL_LOHI(Op, DAG); case ISD::UMUL_LOHI: return lowerUMUL_LOHI(Op, DAG); case ISD::SDIVREM: return lowerSDIVREM(Op, DAG); case ISD::UDIVREM: return lowerUDIVREM(Op, DAG); case ISD::SADDO: case ISD::SSUBO: case ISD::UADDO: case ISD::USUBO: return lowerXALUO(Op, DAG); case ISD::UADDO_CARRY: case ISD::USUBO_CARRY: return lowerUADDSUBO_CARRY(Op, DAG); case ISD::OR: return lowerOR(Op, DAG); case ISD::CTPOP: return lowerCTPOP(Op, DAG); case ISD::VECREDUCE_ADD: return lowerVECREDUCE_ADD(Op, DAG); case ISD::ATOMIC_FENCE: return lowerATOMIC_FENCE(Op, DAG); case ISD::ATOMIC_SWAP: return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW); case ISD::ATOMIC_STORE: case ISD::ATOMIC_LOAD: return lowerATOMIC_LDST_I128(Op, DAG); case ISD::ATOMIC_LOAD_ADD: return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD); case ISD::ATOMIC_LOAD_SUB: return lowerATOMIC_LOAD_SUB(Op, DAG); case ISD::ATOMIC_LOAD_AND: return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND); case ISD::ATOMIC_LOAD_OR: return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR); case ISD::ATOMIC_LOAD_XOR: return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR); case ISD::ATOMIC_LOAD_NAND: return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND); case ISD::ATOMIC_LOAD_MIN: return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN); case ISD::ATOMIC_LOAD_MAX: return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX); case ISD::ATOMIC_LOAD_UMIN: return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN); case ISD::ATOMIC_LOAD_UMAX: return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX); case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: return lowerATOMIC_CMP_SWAP(Op, DAG); case ISD::STACKSAVE: return lowerSTACKSAVE(Op, DAG); case ISD::STACKRESTORE: return lowerSTACKRESTORE(Op, DAG); case ISD::PREFETCH: return lowerPREFETCH(Op, DAG); case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG); case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG); case ISD::SCALAR_TO_VECTOR: return lowerSCALAR_TO_VECTOR(Op, DAG); case ISD::INSERT_VECTOR_ELT: return lowerINSERT_VECTOR_ELT(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::SIGN_EXTEND_VECTOR_INREG: return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG); case ISD::ZERO_EXTEND_VECTOR_INREG: return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG); case ISD::SHL: return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR); case ISD::SRL: return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR); case ISD::SRA: return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR); case ISD::ROTL: return lowerShift(Op, DAG, SystemZISD::VROTL_BY_SCALAR); case ISD::IS_FPCLASS: return lowerIS_FPCLASS(Op, DAG); case ISD::GET_ROUNDING: return lowerGET_ROUNDING(Op, DAG); case ISD::READCYCLECOUNTER: return lowerREADCYCLECOUNTER(Op, DAG); default: llvm_unreachable("Unexpected node to lower"); } } static SDValue expandBitCastI128ToF128(SelectionDAG &DAG, SDValue Src, const SDLoc &SL) { // If i128 is legal, just use a normal bitcast. if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) return DAG.getBitcast(MVT::f128, Src); // Otherwise, f128 must live in FP128, so do a partwise move. assert(DAG.getTargetLoweringInfo().getRepRegClassFor(MVT::f128) == &SystemZ::FP128BitRegClass); SDValue Hi, Lo; std::tie(Lo, Hi) = DAG.SplitScalar(Src, SL, MVT::i64, MVT::i64); Hi = DAG.getBitcast(MVT::f64, Hi); Lo = DAG.getBitcast(MVT::f64, Lo); SDNode *Pair = DAG.getMachineNode( SystemZ::REG_SEQUENCE, SL, MVT::f128, {DAG.getTargetConstant(SystemZ::FP128BitRegClassID, SL, MVT::i32), Lo, DAG.getTargetConstant(SystemZ::subreg_l64, SL, MVT::i32), Hi, DAG.getTargetConstant(SystemZ::subreg_h64, SL, MVT::i32)}); return SDValue(Pair, 0); } static SDValue expandBitCastF128ToI128(SelectionDAG &DAG, SDValue Src, const SDLoc &SL) { // If i128 is legal, just use a normal bitcast. if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) return DAG.getBitcast(MVT::i128, Src); // Otherwise, f128 must live in FP128, so do a partwise move. assert(DAG.getTargetLoweringInfo().getRepRegClassFor(MVT::f128) == &SystemZ::FP128BitRegClass); SDValue LoFP = DAG.getTargetExtractSubreg(SystemZ::subreg_l64, SL, MVT::f64, Src); SDValue HiFP = DAG.getTargetExtractSubreg(SystemZ::subreg_h64, SL, MVT::f64, Src); SDValue Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i64, LoFP); SDValue Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i64, HiFP); return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i128, Lo, Hi); } // Lower operations with invalid operand or result types (currently used // only for 128-bit integer types). void SystemZTargetLowering::LowerOperationWrapper(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { switch (N->getOpcode()) { case ISD::ATOMIC_LOAD: { SDLoc DL(N); SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other); SDValue Ops[] = { N->getOperand(0), N->getOperand(1) }; MachineMemOperand *MMO = cast(N)->getMemOperand(); SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_LOAD_128, DL, Tys, Ops, MVT::i128, MMO); SDValue Lowered = lowerGR128ToI128(DAG, Res); if (N->getValueType(0) == MVT::f128) Lowered = expandBitCastI128ToF128(DAG, Lowered, DL); Results.push_back(Lowered); Results.push_back(Res.getValue(1)); break; } case ISD::ATOMIC_STORE: { SDLoc DL(N); SDVTList Tys = DAG.getVTList(MVT::Other); SDValue Val = N->getOperand(1); if (Val.getValueType() == MVT::f128) Val = expandBitCastF128ToI128(DAG, Val, DL); Val = lowerI128ToGR128(DAG, Val); SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2)}; MachineMemOperand *MMO = cast(N)->getMemOperand(); SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_STORE_128, DL, Tys, Ops, MVT::i128, MMO); // We have to enforce sequential consistency by performing a // serialization operation after the store. if (cast(N)->getSuccessOrdering() == AtomicOrdering::SequentiallyConsistent) Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other, Res), 0); Results.push_back(Res); break; } case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: { SDLoc DL(N); SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other); SDValue Ops[] = { N->getOperand(0), N->getOperand(1), lowerI128ToGR128(DAG, N->getOperand(2)), lowerI128ToGR128(DAG, N->getOperand(3)) }; MachineMemOperand *MMO = cast(N)->getMemOperand(); SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP_128, DL, Tys, Ops, MVT::i128, MMO); SDValue Success = emitSETCC(DAG, DL, Res.getValue(1), SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ); Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1)); Results.push_back(lowerGR128ToI128(DAG, Res)); Results.push_back(Success); Results.push_back(Res.getValue(2)); break; } case ISD::BITCAST: { SDValue Src = N->getOperand(0); if (N->getValueType(0) == MVT::i128 && Src.getValueType() == MVT::f128 && !useSoftFloat()) { SDLoc DL(N); Results.push_back(expandBitCastF128ToI128(DAG, Src, DL)); } break; } default: llvm_unreachable("Unexpected node to lower"); } } void SystemZTargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { return LowerOperationWrapper(N, Results, DAG); } const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { #define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME switch ((SystemZISD::NodeType)Opcode) { case SystemZISD::FIRST_NUMBER: break; OPCODE(RET_GLUE); OPCODE(CALL); OPCODE(SIBCALL); OPCODE(TLS_GDCALL); OPCODE(TLS_LDCALL); OPCODE(PCREL_WRAPPER); OPCODE(PCREL_OFFSET); OPCODE(ICMP); OPCODE(FCMP); OPCODE(STRICT_FCMP); OPCODE(STRICT_FCMPS); OPCODE(TM); OPCODE(BR_CCMASK); OPCODE(SELECT_CCMASK); OPCODE(ADJDYNALLOC); OPCODE(PROBED_ALLOCA); OPCODE(POPCNT); OPCODE(SMUL_LOHI); OPCODE(UMUL_LOHI); OPCODE(SDIVREM); OPCODE(UDIVREM); OPCODE(SADDO); OPCODE(SSUBO); OPCODE(UADDO); OPCODE(USUBO); OPCODE(ADDCARRY); OPCODE(SUBCARRY); OPCODE(GET_CCMASK); OPCODE(MVC); OPCODE(NC); OPCODE(OC); OPCODE(XC); OPCODE(CLC); OPCODE(MEMSET_MVC); OPCODE(STPCPY); OPCODE(STRCMP); OPCODE(SEARCH_STRING); OPCODE(IPM); OPCODE(TBEGIN); OPCODE(TBEGIN_NOFLOAT); OPCODE(TEND); OPCODE(BYTE_MASK); OPCODE(ROTATE_MASK); OPCODE(REPLICATE); OPCODE(JOIN_DWORDS); OPCODE(SPLAT); OPCODE(MERGE_HIGH); OPCODE(MERGE_LOW); OPCODE(SHL_DOUBLE); OPCODE(PERMUTE_DWORDS); OPCODE(PERMUTE); OPCODE(PACK); OPCODE(PACKS_CC); OPCODE(PACKLS_CC); OPCODE(UNPACK_HIGH); OPCODE(UNPACKL_HIGH); OPCODE(UNPACK_LOW); OPCODE(UNPACKL_LOW); OPCODE(VSHL_BY_SCALAR); OPCODE(VSRL_BY_SCALAR); OPCODE(VSRA_BY_SCALAR); OPCODE(VROTL_BY_SCALAR); OPCODE(VSUM); OPCODE(VACC); OPCODE(VSCBI); OPCODE(VAC); OPCODE(VSBI); OPCODE(VACCC); OPCODE(VSBCBI); OPCODE(VICMPE); OPCODE(VICMPH); OPCODE(VICMPHL); OPCODE(VICMPES); OPCODE(VICMPHS); OPCODE(VICMPHLS); OPCODE(VFCMPE); OPCODE(STRICT_VFCMPE); OPCODE(STRICT_VFCMPES); OPCODE(VFCMPH); OPCODE(STRICT_VFCMPH); OPCODE(STRICT_VFCMPHS); OPCODE(VFCMPHE); OPCODE(STRICT_VFCMPHE); OPCODE(STRICT_VFCMPHES); OPCODE(VFCMPES); OPCODE(VFCMPHS); OPCODE(VFCMPHES); OPCODE(VFTCI); OPCODE(VEXTEND); OPCODE(STRICT_VEXTEND); OPCODE(VROUND); OPCODE(STRICT_VROUND); OPCODE(VTM); OPCODE(SCMP128HI); OPCODE(UCMP128HI); OPCODE(VFAE_CC); OPCODE(VFAEZ_CC); OPCODE(VFEE_CC); OPCODE(VFEEZ_CC); OPCODE(VFENE_CC); OPCODE(VFENEZ_CC); OPCODE(VISTR_CC); OPCODE(VSTRC_CC); OPCODE(VSTRCZ_CC); OPCODE(VSTRS_CC); OPCODE(VSTRSZ_CC); OPCODE(TDC); OPCODE(ATOMIC_SWAPW); OPCODE(ATOMIC_LOADW_ADD); OPCODE(ATOMIC_LOADW_SUB); OPCODE(ATOMIC_LOADW_AND); OPCODE(ATOMIC_LOADW_OR); OPCODE(ATOMIC_LOADW_XOR); OPCODE(ATOMIC_LOADW_NAND); OPCODE(ATOMIC_LOADW_MIN); OPCODE(ATOMIC_LOADW_MAX); OPCODE(ATOMIC_LOADW_UMIN); OPCODE(ATOMIC_LOADW_UMAX); OPCODE(ATOMIC_CMP_SWAPW); OPCODE(ATOMIC_CMP_SWAP); OPCODE(ATOMIC_LOAD_128); OPCODE(ATOMIC_STORE_128); OPCODE(ATOMIC_CMP_SWAP_128); OPCODE(LRV); OPCODE(STRV); OPCODE(VLER); OPCODE(VSTER); OPCODE(STCKF); OPCODE(PREFETCH); OPCODE(ADA_ENTRY); } return nullptr; #undef OPCODE } // Return true if VT is a vector whose elements are a whole number of bytes // in width. Also check for presence of vector support. bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const { if (!Subtarget.hasVector()) return false; return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple(); } // Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT // producing a result of type ResVT. Op is a possibly bitcast version // of the input vector and Index is the index (based on type VecVT) that // should be extracted. Return the new extraction if a simplification // was possible or if Force is true. SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT, EVT VecVT, SDValue Op, unsigned Index, DAGCombinerInfo &DCI, bool Force) const { SelectionDAG &DAG = DCI.DAG; // The number of bytes being extracted. unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize(); for (;;) { unsigned Opcode = Op.getOpcode(); if (Opcode == ISD::BITCAST) // Look through bitcasts. Op = Op.getOperand(0); else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) && canTreatAsByteVector(Op.getValueType())) { // Get a VPERM-like permute mask and see whether the bytes covered // by the extracted element are a contiguous sequence from one // source operand. SmallVector Bytes; if (!getVPermMask(Op, Bytes)) break; int First; if (!getShuffleInput(Bytes, Index * BytesPerElement, BytesPerElement, First)) break; if (First < 0) return DAG.getUNDEF(ResVT); // Make sure the contiguous sequence starts at a multiple of the // original element size. unsigned Byte = unsigned(First) % Bytes.size(); if (Byte % BytesPerElement != 0) break; // We can get the extracted value directly from an input. Index = Byte / BytesPerElement; Op = Op.getOperand(unsigned(First) / Bytes.size()); Force = true; } else if (Opcode == ISD::BUILD_VECTOR && canTreatAsByteVector(Op.getValueType())) { // We can only optimize this case if the BUILD_VECTOR elements are // at least as wide as the extracted value. EVT OpVT = Op.getValueType(); unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize(); if (OpBytesPerElement < BytesPerElement) break; // Make sure that the least-significant bit of the extracted value // is the least significant bit of an input. unsigned End = (Index + 1) * BytesPerElement; if (End % OpBytesPerElement != 0) break; // We're extracting the low part of one operand of the BUILD_VECTOR. Op = Op.getOperand(End / OpBytesPerElement - 1); if (!Op.getValueType().isInteger()) { EVT VT = MVT::getIntegerVT(Op.getValueSizeInBits()); Op = DAG.getNode(ISD::BITCAST, DL, VT, Op); DCI.AddToWorklist(Op.getNode()); } EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits()); Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op); if (VT != ResVT) { DCI.AddToWorklist(Op.getNode()); Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op); } return Op; } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG || Opcode == ISD::ANY_EXTEND_VECTOR_INREG) && canTreatAsByteVector(Op.getValueType()) && canTreatAsByteVector(Op.getOperand(0).getValueType())) { // Make sure that only the unextended bits are significant. EVT ExtVT = Op.getValueType(); EVT OpVT = Op.getOperand(0).getValueType(); unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize(); unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize(); unsigned Byte = Index * BytesPerElement; unsigned SubByte = Byte % ExtBytesPerElement; unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement; if (SubByte < MinSubByte || SubByte + BytesPerElement > ExtBytesPerElement) break; // Get the byte offset of the unextended element Byte = Byte / ExtBytesPerElement * OpBytesPerElement; // ...then add the byte offset relative to that element. Byte += SubByte - MinSubByte; if (Byte % BytesPerElement != 0) break; Op = Op.getOperand(0); Index = Byte / BytesPerElement; Force = true; } else break; } if (Force) { if (Op.getValueType() != VecVT) { Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op); DCI.AddToWorklist(Op.getNode()); } return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op, DAG.getConstant(Index, DL, MVT::i32)); } return SDValue(); } // Optimize vector operations in scalar value Op on the basis that Op // is truncated to TruncVT. SDValue SystemZTargetLowering::combineTruncateExtract( const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const { // If we have (trunc (extract_vector_elt X, Y)), try to turn it into // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements // of type TruncVT. if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && TruncVT.getSizeInBits() % 8 == 0) { SDValue Vec = Op.getOperand(0); EVT VecVT = Vec.getValueType(); if (canTreatAsByteVector(VecVT)) { if (auto *IndexN = dyn_cast(Op.getOperand(1))) { unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize(); unsigned TruncBytes = TruncVT.getStoreSize(); if (BytesPerElement % TruncBytes == 0) { // Calculate the value of Y' in the above description. We are // splitting the original elements into Scale equal-sized pieces // and for truncation purposes want the last (least-significant) // of these pieces for IndexN. This is easiest to do by calculating // the start index of the following element and then subtracting 1. unsigned Scale = BytesPerElement / TruncBytes; unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1; // Defer the creation of the bitcast from X to combineExtract, // which might be able to optimize the extraction. - VecVT = MVT::getVectorVT(MVT::getIntegerVT(TruncBytes * 8), + VecVT = EVT::getVectorVT(*DCI.DAG.getContext(), + MVT::getIntegerVT(TruncBytes * 8), VecVT.getStoreSize() / TruncBytes); EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT); return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true); } } } } return SDValue(); } SDValue SystemZTargetLowering::combineZERO_EXTEND( SDNode *N, DAGCombinerInfo &DCI) const { // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2') SelectionDAG &DAG = DCI.DAG; SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); if (N0.getOpcode() == SystemZISD::SELECT_CCMASK) { auto *TrueOp = dyn_cast(N0.getOperand(0)); auto *FalseOp = dyn_cast(N0.getOperand(1)); if (TrueOp && FalseOp) { SDLoc DL(N0); SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT), DAG.getConstant(FalseOp->getZExtValue(), DL, VT), N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) }; SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops); // If N0 has multiple uses, change other uses as well. if (!N0.hasOneUse()) { SDValue TruncSelect = DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect); DCI.CombineTo(N0.getNode(), TruncSelect); } return NewSelect; } } // Convert (zext (xor (trunc X), C)) into (xor (trunc X), C') if the size // of the result is smaller than the size of X and all the truncated bits // of X are already zero. if (N0.getOpcode() == ISD::XOR && N0.hasOneUse() && N0.getOperand(0).hasOneUse() && N0.getOperand(0).getOpcode() == ISD::TRUNCATE && N0.getOperand(1).getOpcode() == ISD::Constant) { SDValue X = N0.getOperand(0).getOperand(0); if (VT.isScalarInteger() && VT.getSizeInBits() < X.getValueSizeInBits()) { KnownBits Known = DAG.computeKnownBits(X); APInt TruncatedBits = APInt::getBitsSet(X.getValueSizeInBits(), N0.getValueSizeInBits(), VT.getSizeInBits()); if (TruncatedBits.isSubsetOf(Known.Zero)) { X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits()); return DAG.getNode(ISD::XOR, SDLoc(N0), VT, X, DAG.getConstant(Mask, SDLoc(N0), VT)); } } } return SDValue(); } SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG( SDNode *N, DAGCombinerInfo &DCI) const { // Convert (sext_in_reg (setcc LHS, RHS, COND), i1) // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1) // into (select_cc LHS, RHS, -1, 0, COND) SelectionDAG &DAG = DCI.DAG; SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); EVT EVT = cast(N->getOperand(1))->getVT(); if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND) N0 = N0.getOperand(0); if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) { SDLoc DL(N0); SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1), DAG.getAllOnesConstant(DL, VT), DAG.getConstant(0, DL, VT), N0.getOperand(2) }; return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); } return SDValue(); } SDValue SystemZTargetLowering::combineSIGN_EXTEND( SDNode *N, DAGCombinerInfo &DCI) const { // Convert (sext (ashr (shl X, C1), C2)) to // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as // cheap as narrower ones. SelectionDAG &DAG = DCI.DAG; SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) { auto *SraAmt = dyn_cast(N0.getOperand(1)); SDValue Inner = N0.getOperand(0); if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) { if (auto *ShlAmt = dyn_cast(Inner.getOperand(1))) { unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits()); unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra; unsigned NewSraAmt = SraAmt->getZExtValue() + Extra; EVT ShiftVT = N0.getOperand(1).getValueType(); SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT, Inner.getOperand(0)); SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext, DAG.getConstant(NewShlAmt, SDLoc(Inner), ShiftVT)); return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT)); } } } return SDValue(); } SDValue SystemZTargetLowering::combineMERGE( SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; unsigned Opcode = N->getOpcode(); SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); if (Op0.getOpcode() == ISD::BITCAST) Op0 = Op0.getOperand(0); if (ISD::isBuildVectorAllZeros(Op0.getNode())) { // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF // for v4f32. if (Op1 == N->getOperand(0)) return Op1; // (z_merge_? 0, X) -> (z_unpackl_? 0, X). EVT VT = Op1.getValueType(); unsigned ElemBytes = VT.getVectorElementType().getStoreSize(); if (ElemBytes <= 4) { Opcode = (Opcode == SystemZISD::MERGE_HIGH ? SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW); EVT InVT = VT.changeVectorElementTypeToInteger(); EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16), SystemZ::VectorBytes / ElemBytes / 2); if (VT != InVT) { Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1); DCI.AddToWorklist(Op1.getNode()); } SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1); DCI.AddToWorklist(Op.getNode()); return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); } } return SDValue(); } static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, SDNode *&HiPart) { LoPart = HiPart = nullptr; // Scan through all users. for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end(); UI != UIEnd; ++UI) { // Skip the uses of the chain. if (UI.getUse().getResNo() != 0) continue; // Verify every user is a TRUNCATE to i64 of the low or high half. SDNode *User = *UI; bool IsLoPart = true; if (User->getOpcode() == ISD::SRL && User->getOperand(1).getOpcode() == ISD::Constant && User->getConstantOperandVal(1) == 64 && User->hasOneUse()) { User = *User->use_begin(); IsLoPart = false; } if (User->getOpcode() != ISD::TRUNCATE || User->getValueType(0) != MVT::i64) return false; if (IsLoPart) { if (LoPart) return false; LoPart = User; } else { if (HiPart) return false; HiPart = User; } } return true; } static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, SDNode *&HiPart) { LoPart = HiPart = nullptr; // Scan through all users. for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end(); UI != UIEnd; ++UI) { // Skip the uses of the chain. if (UI.getUse().getResNo() != 0) continue; // Verify every user is an EXTRACT_SUBREG of the low or high half. SDNode *User = *UI; if (!User->hasOneUse() || !User->isMachineOpcode() || User->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG) return false; switch (User->getConstantOperandVal(1)) { case SystemZ::subreg_l64: if (LoPart) return false; LoPart = User; break; case SystemZ::subreg_h64: if (HiPart) return false; HiPart = User; break; default: return false; } } return true; } SDValue SystemZTargetLowering::combineLOAD( SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; EVT LdVT = N->getValueType(0); SDLoc DL(N); // Replace a 128-bit load that is used solely to move its value into GPRs // by separate loads of both halves. LoadSDNode *LD = cast(N); if (LD->isSimple() && ISD::isNormalLoad(LD)) { SDNode *LoPart, *HiPart; if ((LdVT == MVT::i128 && isI128MovedToParts(LD, LoPart, HiPart)) || (LdVT == MVT::f128 && isF128MovedToParts(LD, LoPart, HiPart))) { // Rewrite each extraction as an independent load. SmallVector ArgChains; if (HiPart) { SDValue EltLoad = DAG.getLoad( HiPart->getValueType(0), DL, LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), LD->getOriginalAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo()); DCI.CombineTo(HiPart, EltLoad, true); ArgChains.push_back(EltLoad.getValue(1)); } if (LoPart) { SDValue EltLoad = DAG.getLoad( LoPart->getValueType(0), DL, LD->getChain(), DAG.getObjectPtrOffset(DL, LD->getBasePtr(), TypeSize::getFixed(8)), LD->getPointerInfo().getWithOffset(8), LD->getOriginalAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo()); DCI.CombineTo(LoPart, EltLoad, true); ArgChains.push_back(EltLoad.getValue(1)); } // Collect all chains via TokenFactor. SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, ArgChains); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); DCI.AddToWorklist(Chain.getNode()); return SDValue(N, 0); } } if (LdVT.isVector() || LdVT.isInteger()) return SDValue(); // Transform a scalar load that is REPLICATEd as well as having other // use(s) to the form where the other use(s) use the first element of the // REPLICATE instead of the load. Otherwise instruction selection will not // produce a VLREP. Avoid extracting to a GPR, so only do this for floating // point loads. SDValue Replicate; SmallVector OtherUses; for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); UI != UE; ++UI) { if (UI->getOpcode() == SystemZISD::REPLICATE) { if (Replicate) return SDValue(); // Should never happen Replicate = SDValue(*UI, 0); } else if (UI.getUse().getResNo() == 0) OtherUses.push_back(*UI); } if (!Replicate || OtherUses.empty()) return SDValue(); SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT, Replicate, DAG.getConstant(0, DL, MVT::i32)); // Update uses of the loaded Value while preserving old chains. for (SDNode *U : OtherUses) { SmallVector Ops; for (SDValue Op : U->ops()) Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op); DAG.UpdateNodeOperands(U, Ops); } return SDValue(N, 0); } bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const { if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) return true; if (Subtarget.hasVectorEnhancements2()) if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::i128) return true; return false; } static bool isVectorElementSwap(ArrayRef M, EVT VT) { if (!VT.isVector() || !VT.isSimple() || VT.getSizeInBits() != 128 || VT.getScalarSizeInBits() % 8 != 0) return false; unsigned NumElts = VT.getVectorNumElements(); for (unsigned i = 0; i < NumElts; ++i) { if (M[i] < 0) continue; // ignore UNDEF indices if ((unsigned) M[i] != NumElts - 1 - i) return false; } return true; } static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) { for (auto *U : StoredVal->uses()) { if (StoreSDNode *ST = dyn_cast(U)) { EVT CurrMemVT = ST->getMemoryVT().getScalarType(); if (CurrMemVT.isRound() && CurrMemVT.getStoreSize() <= 16) continue; } else if (isa(U)) { SDValue BuildVector = SDValue(U, 0); if (DAG.isSplatValue(BuildVector, true/*AllowUndefs*/) && isOnlyUsedByStores(BuildVector, DAG)) continue; } return false; } return true; } static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart) { if (Val.getOpcode() != ISD::OR || !Val.getNode()->hasOneUse()) return false; SDValue Op0 = Val.getOperand(0); SDValue Op1 = Val.getOperand(1); if (Op0.getOpcode() == ISD::SHL) std::swap(Op0, Op1); if (Op1.getOpcode() != ISD::SHL || !Op1.getNode()->hasOneUse() || Op1.getOperand(1).getOpcode() != ISD::Constant || Op1.getConstantOperandVal(1) != 64) return false; Op1 = Op1.getOperand(0); if (Op0.getOpcode() != ISD::ZERO_EXTEND || !Op0.getNode()->hasOneUse() || Op0.getOperand(0).getValueType() != MVT::i64) return false; if (Op1.getOpcode() != ISD::ANY_EXTEND || !Op1.getNode()->hasOneUse() || Op1.getOperand(0).getValueType() != MVT::i64) return false; LoPart = Op0.getOperand(0); HiPart = Op1.getOperand(0); return true; } static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart) { if (!Val.getNode()->hasOneUse() || !Val.isMachineOpcode() || Val.getMachineOpcode() != TargetOpcode::REG_SEQUENCE) return false; if (Val->getNumOperands() != 5 || Val->getOperand(0)->getAsZExtVal() != SystemZ::FP128BitRegClassID || Val->getOperand(2)->getAsZExtVal() != SystemZ::subreg_l64 || Val->getOperand(4)->getAsZExtVal() != SystemZ::subreg_h64) return false; LoPart = Val->getOperand(1); HiPart = Val->getOperand(3); return true; } SDValue SystemZTargetLowering::combineSTORE( SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; auto *SN = cast(N); auto &Op1 = N->getOperand(1); EVT MemVT = SN->getMemoryVT(); // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better // for the extraction to be done on a vMiN value, so that we can use VSTE. // If X has wider elements then convert it to: // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z). if (MemVT.isInteger() && SN->isTruncatingStore()) { if (SDValue Value = combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) { DCI.AddToWorklist(Value.getNode()); // Rewrite the store with the new form of stored value. return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value, SN->getBasePtr(), SN->getMemoryVT(), SN->getMemOperand()); } } // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR if (!SN->isTruncatingStore() && Op1.getOpcode() == ISD::BSWAP && Op1.getNode()->hasOneUse() && canLoadStoreByteSwapped(Op1.getValueType())) { SDValue BSwapOp = Op1.getOperand(0); if (BSwapOp.getValueType() == MVT::i16) BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp); SDValue Ops[] = { N->getOperand(0), BSwapOp, N->getOperand(2) }; return DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other), Ops, MemVT, SN->getMemOperand()); } // Combine STORE (element-swap) into VSTER if (!SN->isTruncatingStore() && Op1.getOpcode() == ISD::VECTOR_SHUFFLE && Op1.getNode()->hasOneUse() && Subtarget.hasVectorEnhancements2()) { ShuffleVectorSDNode *SVN = cast(Op1.getNode()); ArrayRef ShuffleMask = SVN->getMask(); if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) { SDValue Ops[] = { N->getOperand(0), Op1.getOperand(0), N->getOperand(2) }; return DAG.getMemIntrinsicNode(SystemZISD::VSTER, SDLoc(N), DAG.getVTList(MVT::Other), Ops, MemVT, SN->getMemOperand()); } } // Combine STORE (READCYCLECOUNTER) into STCKF. if (!SN->isTruncatingStore() && Op1.getOpcode() == ISD::READCYCLECOUNTER && Op1.hasOneUse() && N->getOperand(0).reachesChainWithoutSideEffects(SDValue(Op1.getNode(), 1))) { SDValue Ops[] = { Op1.getOperand(0), N->getOperand(2) }; return DAG.getMemIntrinsicNode(SystemZISD::STCKF, SDLoc(N), DAG.getVTList(MVT::Other), Ops, MemVT, SN->getMemOperand()); } // Transform a store of a 128-bit value moved from parts into two stores. if (SN->isSimple() && ISD::isNormalStore(SN)) { SDValue LoPart, HiPart; if ((MemVT == MVT::i128 && isI128MovedFromParts(Op1, LoPart, HiPart)) || (MemVT == MVT::f128 && isF128MovedFromParts(Op1, LoPart, HiPart))) { SDLoc DL(SN); SDValue Chain0 = DAG.getStore(SN->getChain(), DL, HiPart, SN->getBasePtr(), SN->getPointerInfo(), SN->getOriginalAlign(), SN->getMemOperand()->getFlags(), SN->getAAInfo()); SDValue Chain1 = DAG.getStore(SN->getChain(), DL, LoPart, DAG.getObjectPtrOffset(DL, SN->getBasePtr(), TypeSize::getFixed(8)), SN->getPointerInfo().getWithOffset(8), SN->getOriginalAlign(), SN->getMemOperand()->getFlags(), SN->getAAInfo()); return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain0, Chain1); } } // Replicate a reg or immediate with VREP instead of scalar multiply or // immediate load. It seems best to do this during the first DAGCombine as // it is straight-forward to handle the zero-extend node in the initial // DAG, and also not worry about the keeping the new MemVT legal (e.g. when // extracting an i16 element from a v16i8 vector). if (Subtarget.hasVector() && DCI.Level == BeforeLegalizeTypes && isOnlyUsedByStores(Op1, DAG)) { SDValue Word = SDValue(); EVT WordVT; // Find a replicated immediate and return it if found in Word and its // type in WordVT. auto FindReplicatedImm = [&](ConstantSDNode *C, unsigned TotBytes) { // Some constants are better handled with a scalar store. if (C->getAPIntValue().getBitWidth() > 64 || C->isAllOnes() || isInt<16>(C->getSExtValue()) || MemVT.getStoreSize() <= 2) return; SystemZVectorConstantInfo VCI(APInt(TotBytes * 8, C->getZExtValue())); if (VCI.isVectorConstantLegal(Subtarget) && VCI.Opcode == SystemZISD::REPLICATE) { Word = DAG.getConstant(VCI.OpVals[0], SDLoc(SN), MVT::i32); WordVT = VCI.VecVT.getScalarType(); } }; // Find a replicated register and return it if found in Word and its type // in WordVT. auto FindReplicatedReg = [&](SDValue MulOp) { EVT MulVT = MulOp.getValueType(); if (MulOp->getOpcode() == ISD::MUL && (MulVT == MVT::i16 || MulVT == MVT::i32 || MulVT == MVT::i64)) { // Find a zero extended value and its type. SDValue LHS = MulOp->getOperand(0); if (LHS->getOpcode() == ISD::ZERO_EXTEND) WordVT = LHS->getOperand(0).getValueType(); else if (LHS->getOpcode() == ISD::AssertZext) WordVT = cast(LHS->getOperand(1))->getVT(); else return; // Find a replicating constant, e.g. 0x00010001. if (auto *C = dyn_cast(MulOp->getOperand(1))) { SystemZVectorConstantInfo VCI( APInt(MulVT.getSizeInBits(), C->getZExtValue())); if (VCI.isVectorConstantLegal(Subtarget) && VCI.Opcode == SystemZISD::REPLICATE && VCI.OpVals[0] == 1 && WordVT == VCI.VecVT.getScalarType()) Word = DAG.getZExtOrTrunc(LHS->getOperand(0), SDLoc(SN), WordVT); } } }; if (isa(Op1) && DAG.isSplatValue(Op1, true/*AllowUndefs*/)) { SDValue SplatVal = Op1->getOperand(0); if (auto *C = dyn_cast(SplatVal)) FindReplicatedImm(C, SplatVal.getValueType().getStoreSize()); else FindReplicatedReg(SplatVal); } else { if (auto *C = dyn_cast(Op1)) FindReplicatedImm(C, MemVT.getStoreSize()); else FindReplicatedReg(Op1); } if (Word != SDValue()) { assert(MemVT.getSizeInBits() % WordVT.getSizeInBits() == 0 && "Bad type handling"); unsigned NumElts = MemVT.getSizeInBits() / WordVT.getSizeInBits(); EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), WordVT, NumElts); SDValue SplatVal = DAG.getSplatVector(SplatVT, SDLoc(SN), Word); return DAG.getStore(SN->getChain(), SDLoc(SN), SplatVal, SN->getBasePtr(), SN->getMemOperand()); } } return SDValue(); } SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE( SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; // Combine element-swap (LOAD) into VLER if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && N->getOperand(0).hasOneUse() && Subtarget.hasVectorEnhancements2()) { ShuffleVectorSDNode *SVN = cast(N); ArrayRef ShuffleMask = SVN->getMask(); if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) { SDValue Load = N->getOperand(0); LoadSDNode *LD = cast(Load); // Create the element-swapping load. SDValue Ops[] = { LD->getChain(), // Chain LD->getBasePtr() // Ptr }; SDValue ESLoad = DAG.getMemIntrinsicNode(SystemZISD::VLER, SDLoc(N), DAG.getVTList(LD->getValueType(0), MVT::Other), Ops, LD->getMemoryVT(), LD->getMemOperand()); // First, combine the VECTOR_SHUFFLE away. This makes the value produced // by the load dead. DCI.CombineTo(N, ESLoad); // Next, combine the load away, we give it a bogus result value but a real // chain result. The result value is dead because the shuffle is dead. DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1)); // Return N so it doesn't get rechecked! return SDValue(N, 0); } } return SDValue(); } SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT( SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; if (!Subtarget.hasVector()) return SDValue(); // Look through bitcasts that retain the number of vector elements. SDValue Op = N->getOperand(0); if (Op.getOpcode() == ISD::BITCAST && Op.getValueType().isVector() && Op.getOperand(0).getValueType().isVector() && Op.getValueType().getVectorNumElements() == Op.getOperand(0).getValueType().getVectorNumElements()) Op = Op.getOperand(0); // Pull BSWAP out of a vector extraction. if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) { EVT VecVT = Op.getValueType(); EVT EltVT = VecVT.getVectorElementType(); Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT, Op.getOperand(0), N->getOperand(1)); DCI.AddToWorklist(Op.getNode()); Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op); if (EltVT != N->getValueType(0)) { DCI.AddToWorklist(Op.getNode()); Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op); } return Op; } // Try to simplify a vector extraction. if (auto *IndexN = dyn_cast(N->getOperand(1))) { SDValue Op0 = N->getOperand(0); EVT VecVT = Op0.getValueType(); return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0, IndexN->getZExtValue(), DCI, false); } return SDValue(); } SDValue SystemZTargetLowering::combineJOIN_DWORDS( SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; // (join_dwords X, X) == (replicate X) if (N->getOperand(0) == N->getOperand(1)) return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0), N->getOperand(0)); return SDValue(); } static SDValue MergeInputChains(SDNode *N1, SDNode *N2) { SDValue Chain1 = N1->getOperand(0); SDValue Chain2 = N2->getOperand(0); // Trivial case: both nodes take the same chain. if (Chain1 == Chain2) return Chain1; // FIXME - we could handle more complex cases via TokenFactor, // assuming we can verify that this would not create a cycle. return SDValue(); } SDValue SystemZTargetLowering::combineFP_ROUND( SDNode *N, DAGCombinerInfo &DCI) const { if (!Subtarget.hasVector()) return SDValue(); // (fpround (extract_vector_elt X 0)) // (fpround (extract_vector_elt X 1)) -> // (extract_vector_elt (VROUND X) 0) // (extract_vector_elt (VROUND X) 2) // // This is a special case since the target doesn't really support v2f32s. unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; SelectionDAG &DAG = DCI.DAG; SDValue Op0 = N->getOperand(OpNo); if (N->getValueType(0) == MVT::f32 && Op0.hasOneUse() && Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && Op0.getOperand(0).getValueType() == MVT::v2f64 && Op0.getOperand(1).getOpcode() == ISD::Constant && Op0.getConstantOperandVal(1) == 0) { SDValue Vec = Op0.getOperand(0); for (auto *U : Vec->uses()) { if (U != Op0.getNode() && U->hasOneUse() && U->getOpcode() == ISD::EXTRACT_VECTOR_ELT && U->getOperand(0) == Vec && U->getOperand(1).getOpcode() == ISD::Constant && U->getConstantOperandVal(1) == 1) { SDValue OtherRound = SDValue(*U->use_begin(), 0); if (OtherRound.getOpcode() == N->getOpcode() && OtherRound.getOperand(OpNo) == SDValue(U, 0) && OtherRound.getValueType() == MVT::f32) { SDValue VRound, Chain; if (N->isStrictFPOpcode()) { Chain = MergeInputChains(N, OtherRound.getNode()); if (!Chain) continue; VRound = DAG.getNode(SystemZISD::STRICT_VROUND, SDLoc(N), {MVT::v4f32, MVT::Other}, {Chain, Vec}); Chain = VRound.getValue(1); } else VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N), MVT::v4f32, Vec); DCI.AddToWorklist(VRound.getNode()); SDValue Extract1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32, VRound, DAG.getConstant(2, SDLoc(U), MVT::i32)); DCI.AddToWorklist(Extract1.getNode()); DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1); if (Chain) DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain); SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32)); if (Chain) return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0), N->getVTList(), Extract0, Chain); return Extract0; } } } } return SDValue(); } SDValue SystemZTargetLowering::combineFP_EXTEND( SDNode *N, DAGCombinerInfo &DCI) const { if (!Subtarget.hasVector()) return SDValue(); // (fpextend (extract_vector_elt X 0)) // (fpextend (extract_vector_elt X 2)) -> // (extract_vector_elt (VEXTEND X) 0) // (extract_vector_elt (VEXTEND X) 1) // // This is a special case since the target doesn't really support v2f32s. unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; SelectionDAG &DAG = DCI.DAG; SDValue Op0 = N->getOperand(OpNo); if (N->getValueType(0) == MVT::f64 && Op0.hasOneUse() && Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && Op0.getOperand(0).getValueType() == MVT::v4f32 && Op0.getOperand(1).getOpcode() == ISD::Constant && Op0.getConstantOperandVal(1) == 0) { SDValue Vec = Op0.getOperand(0); for (auto *U : Vec->uses()) { if (U != Op0.getNode() && U->hasOneUse() && U->getOpcode() == ISD::EXTRACT_VECTOR_ELT && U->getOperand(0) == Vec && U->getOperand(1).getOpcode() == ISD::Constant && U->getConstantOperandVal(1) == 2) { SDValue OtherExtend = SDValue(*U->use_begin(), 0); if (OtherExtend.getOpcode() == N->getOpcode() && OtherExtend.getOperand(OpNo) == SDValue(U, 0) && OtherExtend.getValueType() == MVT::f64) { SDValue VExtend, Chain; if (N->isStrictFPOpcode()) { Chain = MergeInputChains(N, OtherExtend.getNode()); if (!Chain) continue; VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N), {MVT::v2f64, MVT::Other}, {Chain, Vec}); Chain = VExtend.getValue(1); } else VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N), MVT::v2f64, Vec); DCI.AddToWorklist(VExtend.getNode()); SDValue Extract1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64, VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32)); DCI.AddToWorklist(Extract1.getNode()); DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1); if (Chain) DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain); SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64, VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32)); if (Chain) return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0), N->getVTList(), Extract0, Chain); return Extract0; } } } } return SDValue(); } SDValue SystemZTargetLowering::combineINT_TO_FP( SDNode *N, DAGCombinerInfo &DCI) const { if (DCI.Level != BeforeLegalizeTypes) return SDValue(); SelectionDAG &DAG = DCI.DAG; LLVMContext &Ctx = *DAG.getContext(); unsigned Opcode = N->getOpcode(); EVT OutVT = N->getValueType(0); Type *OutLLVMTy = OutVT.getTypeForEVT(Ctx); SDValue Op = N->getOperand(0); unsigned OutScalarBits = OutLLVMTy->getScalarSizeInBits(); unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits(); // Insert an extension before type-legalization to avoid scalarization, e.g.: // v2f64 = uint_to_fp v2i16 // => // v2f64 = uint_to_fp (v2i64 zero_extend v2i16) if (OutLLVMTy->isVectorTy() && OutScalarBits > InScalarBits && OutScalarBits <= 64) { unsigned NumElts = cast(OutLLVMTy)->getNumElements(); EVT ExtVT = EVT::getVectorVT( Ctx, EVT::getIntegerVT(Ctx, OutLLVMTy->getScalarSizeInBits()), NumElts); unsigned ExtOpcode = (Opcode == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND); SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op); return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp); } return SDValue(); } SDValue SystemZTargetLowering::combineBSWAP( SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && N->getOperand(0).hasOneUse() && canLoadStoreByteSwapped(N->getValueType(0))) { SDValue Load = N->getOperand(0); LoadSDNode *LD = cast(Load); // Create the byte-swapping load. SDValue Ops[] = { LD->getChain(), // Chain LD->getBasePtr() // Ptr }; EVT LoadVT = N->getValueType(0); if (LoadVT == MVT::i16) LoadVT = MVT::i32; SDValue BSLoad = DAG.getMemIntrinsicNode(SystemZISD::LRV, SDLoc(N), DAG.getVTList(LoadVT, MVT::Other), Ops, LD->getMemoryVT(), LD->getMemOperand()); // If this is an i16 load, insert the truncate. SDValue ResVal = BSLoad; if (N->getValueType(0) == MVT::i16) ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad); // First, combine the bswap away. This makes the value produced by the // load dead. DCI.CombineTo(N, ResVal); // Next, combine the load away, we give it a bogus result value but a real // chain result. The result value is dead because the bswap is dead. DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1)); // Return N so it doesn't get rechecked! return SDValue(N, 0); } // Look through bitcasts that retain the number of vector elements. SDValue Op = N->getOperand(0); if (Op.getOpcode() == ISD::BITCAST && Op.getValueType().isVector() && Op.getOperand(0).getValueType().isVector() && Op.getValueType().getVectorNumElements() == Op.getOperand(0).getValueType().getVectorNumElements()) Op = Op.getOperand(0); // Push BSWAP into a vector insertion if at least one side then simplifies. if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) { SDValue Vec = Op.getOperand(0); SDValue Elt = Op.getOperand(1); SDValue Idx = Op.getOperand(2); if (DAG.isConstantIntBuildVectorOrConstantInt(Vec) || Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() || DAG.isConstantIntBuildVectorOrConstantInt(Elt) || Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() || (canLoadStoreByteSwapped(N->getValueType(0)) && ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) { EVT VecVT = N->getValueType(0); EVT EltVT = N->getValueType(0).getVectorElementType(); if (VecVT != Vec.getValueType()) { Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec); DCI.AddToWorklist(Vec.getNode()); } if (EltVT != Elt.getValueType()) { Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt); DCI.AddToWorklist(Elt.getNode()); } Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec); DCI.AddToWorklist(Vec.getNode()); Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt); DCI.AddToWorklist(Elt.getNode()); return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT, Vec, Elt, Idx); } } // Push BSWAP into a vector shuffle if at least one side then simplifies. ShuffleVectorSDNode *SV = dyn_cast(Op); if (SV && Op.hasOneUse()) { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); if (DAG.isConstantIntBuildVectorOrConstantInt(Op0) || Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() || DAG.isConstantIntBuildVectorOrConstantInt(Op1) || Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) { EVT VecVT = N->getValueType(0); if (VecVT != Op0.getValueType()) { Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0); DCI.AddToWorklist(Op0.getNode()); } if (VecVT != Op1.getValueType()) { Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1); DCI.AddToWorklist(Op1.getNode()); } Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0); DCI.AddToWorklist(Op0.getNode()); Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1); DCI.AddToWorklist(Op1.getNode()); return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask()); } } return SDValue(); } static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code // set by the CCReg instruction using the CCValid / CCMask masks, // If the CCReg instruction is itself a ICMP testing the condition // code set by some other instruction, see whether we can directly // use that condition code. // Verify that we have an ICMP against some constant. if (CCValid != SystemZ::CCMASK_ICMP) return false; auto *ICmp = CCReg.getNode(); if (ICmp->getOpcode() != SystemZISD::ICMP) return false; auto *CompareLHS = ICmp->getOperand(0).getNode(); auto *CompareRHS = dyn_cast(ICmp->getOperand(1)); if (!CompareRHS) return false; // Optimize the case where CompareLHS is a SELECT_CCMASK. if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) { // Verify that we have an appropriate mask for a EQ or NE comparison. bool Invert = false; if (CCMask == SystemZ::CCMASK_CMP_NE) Invert = !Invert; else if (CCMask != SystemZ::CCMASK_CMP_EQ) return false; // Verify that the ICMP compares against one of select values. auto *TrueVal = dyn_cast(CompareLHS->getOperand(0)); if (!TrueVal) return false; auto *FalseVal = dyn_cast(CompareLHS->getOperand(1)); if (!FalseVal) return false; if (CompareRHS->getZExtValue() == FalseVal->getZExtValue()) Invert = !Invert; else if (CompareRHS->getZExtValue() != TrueVal->getZExtValue()) return false; // Compute the effective CC mask for the new branch or select. auto *NewCCValid = dyn_cast(CompareLHS->getOperand(2)); auto *NewCCMask = dyn_cast(CompareLHS->getOperand(3)); if (!NewCCValid || !NewCCMask) return false; CCValid = NewCCValid->getZExtValue(); CCMask = NewCCMask->getZExtValue(); if (Invert) CCMask ^= CCValid; // Return the updated CCReg link. CCReg = CompareLHS->getOperand(4); return true; } // Optimize the case where CompareRHS is (SRA (SHL (IPM))). if (CompareLHS->getOpcode() == ISD::SRA) { auto *SRACount = dyn_cast(CompareLHS->getOperand(1)); if (!SRACount || SRACount->getZExtValue() != 30) return false; auto *SHL = CompareLHS->getOperand(0).getNode(); if (SHL->getOpcode() != ISD::SHL) return false; auto *SHLCount = dyn_cast(SHL->getOperand(1)); if (!SHLCount || SHLCount->getZExtValue() != 30 - SystemZ::IPM_CC) return false; auto *IPM = SHL->getOperand(0).getNode(); if (IPM->getOpcode() != SystemZISD::IPM) return false; // Avoid introducing CC spills (because SRA would clobber CC). if (!CompareLHS->hasOneUse()) return false; // Verify that the ICMP compares against zero. if (CompareRHS->getZExtValue() != 0) return false; // Compute the effective CC mask for the new branch or select. CCMask = SystemZ::reverseCCMask(CCMask); // Return the updated CCReg link. CCReg = IPM->getOperand(0); return true; } return false; } SDValue SystemZTargetLowering::combineBR_CCMASK( SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK. auto *CCValid = dyn_cast(N->getOperand(1)); auto *CCMask = dyn_cast(N->getOperand(2)); if (!CCValid || !CCMask) return SDValue(); int CCValidVal = CCValid->getZExtValue(); int CCMaskVal = CCMask->getZExtValue(); SDValue Chain = N->getOperand(0); SDValue CCReg = N->getOperand(4); if (combineCCMask(CCReg, CCValidVal, CCMaskVal)) return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0), Chain, DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32), DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), N->getOperand(3), CCReg); return SDValue(); } SDValue SystemZTargetLowering::combineSELECT_CCMASK( SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK. auto *CCValid = dyn_cast(N->getOperand(2)); auto *CCMask = dyn_cast(N->getOperand(3)); if (!CCValid || !CCMask) return SDValue(); int CCValidVal = CCValid->getZExtValue(); int CCMaskVal = CCMask->getZExtValue(); SDValue CCReg = N->getOperand(4); if (combineCCMask(CCReg, CCValidVal, CCMaskVal)) return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0), N->getOperand(0), N->getOperand(1), DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32), DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), CCReg); return SDValue(); } SDValue SystemZTargetLowering::combineGET_CCMASK( SDNode *N, DAGCombinerInfo &DCI) const { // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible auto *CCValid = dyn_cast(N->getOperand(1)); auto *CCMask = dyn_cast(N->getOperand(2)); if (!CCValid || !CCMask) return SDValue(); int CCValidVal = CCValid->getZExtValue(); int CCMaskVal = CCMask->getZExtValue(); SDValue Select = N->getOperand(0); if (Select->getOpcode() == ISD::TRUNCATE) Select = Select->getOperand(0); if (Select->getOpcode() != SystemZISD::SELECT_CCMASK) return SDValue(); auto *SelectCCValid = dyn_cast(Select->getOperand(2)); auto *SelectCCMask = dyn_cast(Select->getOperand(3)); if (!SelectCCValid || !SelectCCMask) return SDValue(); int SelectCCValidVal = SelectCCValid->getZExtValue(); int SelectCCMaskVal = SelectCCMask->getZExtValue(); auto *TrueVal = dyn_cast(Select->getOperand(0)); auto *FalseVal = dyn_cast(Select->getOperand(1)); if (!TrueVal || !FalseVal) return SDValue(); if (TrueVal->getZExtValue() == 1 && FalseVal->getZExtValue() == 0) ; else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() == 1) SelectCCMaskVal ^= SelectCCValidVal; else return SDValue(); if (SelectCCValidVal & ~CCValidVal) return SDValue(); if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal)) return SDValue(); return Select->getOperand(4); } SDValue SystemZTargetLowering::combineIntDIVREM( SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; EVT VT = N->getValueType(0); // In the case where the divisor is a vector of constants a cheaper // sequence of instructions can replace the divide. BuildSDIV is called to // do this during DAG combining, but it only succeeds when it can build a // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and // since it is not Legal but Custom it can only happen before // legalization. Therefore we must scalarize this early before Combine // 1. For widened vectors, this is already the result of type legalization. if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) && DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1))) return DAG.UnrollVectorOp(N); return SDValue(); } SDValue SystemZTargetLowering::combineINTRINSIC( SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; unsigned Id = N->getConstantOperandVal(1); switch (Id) { // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15 // or larger is simply a vector load. case Intrinsic::s390_vll: case Intrinsic::s390_vlrl: if (auto *C = dyn_cast(N->getOperand(2))) if (C->getZExtValue() >= 15) return DAG.getLoad(N->getValueType(0), SDLoc(N), N->getOperand(0), N->getOperand(3), MachinePointerInfo()); break; // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH. case Intrinsic::s390_vstl: case Intrinsic::s390_vstrl: if (auto *C = dyn_cast(N->getOperand(3))) if (C->getZExtValue() >= 15) return DAG.getStore(N->getOperand(0), SDLoc(N), N->getOperand(2), N->getOperand(4), MachinePointerInfo()); break; } return SDValue(); } SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const { if (N->getOpcode() == SystemZISD::PCREL_WRAPPER) return N->getOperand(0); return N; } SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch(N->getOpcode()) { default: break; case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI); case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI); case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI); case SystemZISD::MERGE_HIGH: case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI); case ISD::LOAD: return combineLOAD(N, DCI); case ISD::STORE: return combineSTORE(N, DCI); case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI); case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI); case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI); case ISD::STRICT_FP_ROUND: case ISD::FP_ROUND: return combineFP_ROUND(N, DCI); case ISD::STRICT_FP_EXTEND: case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI); case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI); case ISD::BSWAP: return combineBSWAP(N, DCI); case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI); case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI); case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI); case ISD::SDIV: case ISD::UDIV: case ISD::SREM: case ISD::UREM: return combineIntDIVREM(N, DCI); case ISD::INTRINSIC_W_CHAIN: case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI); } return SDValue(); } // Return the demanded elements for the OpNo source operand of Op. DemandedElts // are for Op. static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, unsigned OpNo) { EVT VT = Op.getValueType(); unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1); APInt SrcDemE; unsigned Opcode = Op.getOpcode(); if (Opcode == ISD::INTRINSIC_WO_CHAIN) { unsigned Id = Op.getConstantOperandVal(0); switch (Id) { case Intrinsic::s390_vpksh: // PACKS case Intrinsic::s390_vpksf: case Intrinsic::s390_vpksg: case Intrinsic::s390_vpkshs: // PACKS_CC case Intrinsic::s390_vpksfs: case Intrinsic::s390_vpksgs: case Intrinsic::s390_vpklsh: // PACKLS case Intrinsic::s390_vpklsf: case Intrinsic::s390_vpklsg: case Intrinsic::s390_vpklshs: // PACKLS_CC case Intrinsic::s390_vpklsfs: case Intrinsic::s390_vpklsgs: // VECTOR PACK truncates the elements of two source vectors into one. SrcDemE = DemandedElts; if (OpNo == 2) SrcDemE.lshrInPlace(NumElts / 2); SrcDemE = SrcDemE.trunc(NumElts / 2); break; // VECTOR UNPACK extends half the elements of the source vector. case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH case Intrinsic::s390_vuphh: case Intrinsic::s390_vuphf: case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH case Intrinsic::s390_vuplhh: case Intrinsic::s390_vuplhf: SrcDemE = APInt(NumElts * 2, 0); SrcDemE.insertBits(DemandedElts, 0); break; case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW case Intrinsic::s390_vuplhw: case Intrinsic::s390_vuplf: case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW case Intrinsic::s390_vupllh: case Intrinsic::s390_vupllf: SrcDemE = APInt(NumElts * 2, 0); SrcDemE.insertBits(DemandedElts, NumElts); break; case Intrinsic::s390_vpdi: { // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source. SrcDemE = APInt(NumElts, 0); if (!DemandedElts[OpNo - 1]) break; unsigned Mask = Op.getConstantOperandVal(3); unsigned MaskBit = ((OpNo - 1) ? 1 : 4); // Demand input element 0 or 1, given by the mask bit value. SrcDemE.setBit((Mask & MaskBit)? 1 : 0); break; } case Intrinsic::s390_vsldb: { // VECTOR SHIFT LEFT DOUBLE BY BYTE assert(VT == MVT::v16i8 && "Unexpected type."); unsigned FirstIdx = Op.getConstantOperandVal(3); assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand."); unsigned NumSrc0Els = 16 - FirstIdx; SrcDemE = APInt(NumElts, 0); if (OpNo == 1) { APInt DemEls = DemandedElts.trunc(NumSrc0Els); SrcDemE.insertBits(DemEls, FirstIdx); } else { APInt DemEls = DemandedElts.lshr(NumSrc0Els); SrcDemE.insertBits(DemEls, 0); } break; } case Intrinsic::s390_vperm: SrcDemE = APInt(NumElts, -1); break; default: llvm_unreachable("Unhandled intrinsic."); break; } } else { switch (Opcode) { case SystemZISD::JOIN_DWORDS: // Scalar operand. SrcDemE = APInt(1, 1); break; case SystemZISD::SELECT_CCMASK: SrcDemE = DemandedElts; break; default: llvm_unreachable("Unhandled opcode."); break; } } return SrcDemE; } static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo) { APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo); APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1); KnownBits LHSKnown = DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1); KnownBits RHSKnown = DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1); Known = LHSKnown.intersectWith(RHSKnown); } void SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { Known.resetAll(); // Intrinsic CC result is returned in the two low bits. unsigned tmp0, tmp1; // not used if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, tmp0, tmp1)) { Known.Zero.setBitsFrom(2); return; } EVT VT = Op.getValueType(); if (Op.getResNo() != 0 || VT == MVT::Untyped) return; assert (Known.getBitWidth() == VT.getScalarSizeInBits() && "KnownBits does not match VT in bitwidth"); assert ((!VT.isVector() || (DemandedElts.getBitWidth() == VT.getVectorNumElements())) && "DemandedElts does not match VT number of elements"); unsigned BitWidth = Known.getBitWidth(); unsigned Opcode = Op.getOpcode(); if (Opcode == ISD::INTRINSIC_WO_CHAIN) { bool IsLogical = false; unsigned Id = Op.getConstantOperandVal(0); switch (Id) { case Intrinsic::s390_vpksh: // PACKS case Intrinsic::s390_vpksf: case Intrinsic::s390_vpksg: case Intrinsic::s390_vpkshs: // PACKS_CC case Intrinsic::s390_vpksfs: case Intrinsic::s390_vpksgs: case Intrinsic::s390_vpklsh: // PACKLS case Intrinsic::s390_vpklsf: case Intrinsic::s390_vpklsg: case Intrinsic::s390_vpklshs: // PACKLS_CC case Intrinsic::s390_vpklsfs: case Intrinsic::s390_vpklsgs: case Intrinsic::s390_vpdi: case Intrinsic::s390_vsldb: case Intrinsic::s390_vperm: computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1); break; case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH case Intrinsic::s390_vuplhh: case Intrinsic::s390_vuplhf: case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW case Intrinsic::s390_vupllh: case Intrinsic::s390_vupllf: IsLogical = true; [[fallthrough]]; case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH case Intrinsic::s390_vuphh: case Intrinsic::s390_vuphf: case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW case Intrinsic::s390_vuplhw: case Intrinsic::s390_vuplf: { SDValue SrcOp = Op.getOperand(1); APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0); Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1); if (IsLogical) { Known = Known.zext(BitWidth); } else Known = Known.sext(BitWidth); break; } default: break; } } else { switch (Opcode) { case SystemZISD::JOIN_DWORDS: case SystemZISD::SELECT_CCMASK: computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0); break; case SystemZISD::REPLICATE: { SDValue SrcOp = Op.getOperand(0); Known = DAG.computeKnownBits(SrcOp, Depth + 1); if (Known.getBitWidth() < BitWidth && isa(SrcOp)) Known = Known.sext(BitWidth); // VREPI sign extends the immedate. break; } default: break; } } // Known has the width of the source operand(s). Adjust if needed to match // the passed bitwidth. if (Known.getBitWidth() != BitWidth) Known = Known.anyextOrTrunc(BitWidth); } static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo) { APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo); unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1); if (LHS == 1) return 1; // Early out. APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1); unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1); if (RHS == 1) return 1; // Early out. unsigned Common = std::min(LHS, RHS); unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits(); EVT VT = Op.getValueType(); unsigned VTBits = VT.getScalarSizeInBits(); if (SrcBitWidth > VTBits) { // PACK unsigned SrcExtraBits = SrcBitWidth - VTBits; if (Common > SrcExtraBits) return (Common - SrcExtraBits); return 1; } assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth."); return Common; } unsigned SystemZTargetLowering::ComputeNumSignBitsForTargetNode( SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { if (Op.getResNo() != 0) return 1; unsigned Opcode = Op.getOpcode(); if (Opcode == ISD::INTRINSIC_WO_CHAIN) { unsigned Id = Op.getConstantOperandVal(0); switch (Id) { case Intrinsic::s390_vpksh: // PACKS case Intrinsic::s390_vpksf: case Intrinsic::s390_vpksg: case Intrinsic::s390_vpkshs: // PACKS_CC case Intrinsic::s390_vpksfs: case Intrinsic::s390_vpksgs: case Intrinsic::s390_vpklsh: // PACKLS case Intrinsic::s390_vpklsf: case Intrinsic::s390_vpklsg: case Intrinsic::s390_vpklshs: // PACKLS_CC case Intrinsic::s390_vpklsfs: case Intrinsic::s390_vpklsgs: case Intrinsic::s390_vpdi: case Intrinsic::s390_vsldb: case Intrinsic::s390_vperm: return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1); case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH case Intrinsic::s390_vuphh: case Intrinsic::s390_vuphf: case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW case Intrinsic::s390_vuplhw: case Intrinsic::s390_vuplf: { SDValue PackedOp = Op.getOperand(1); APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1); unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1); EVT VT = Op.getValueType(); unsigned VTBits = VT.getScalarSizeInBits(); Tmp += VTBits - PackedOp.getScalarValueSizeInBits(); return Tmp; } default: break; } } else { switch (Opcode) { case SystemZISD::SELECT_CCMASK: return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0); default: break; } } return 1; } bool SystemZTargetLowering:: isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const { switch (Op->getOpcode()) { case SystemZISD::PCREL_WRAPPER: case SystemZISD::PCREL_OFFSET: return true; } return false; } unsigned SystemZTargetLowering::getStackProbeSize(const MachineFunction &MF) const { const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); unsigned StackAlign = TFI->getStackAlignment(); assert(StackAlign >=1 && isPowerOf2_32(StackAlign) && "Unexpected stack alignment"); // The default stack probe size is 4096 if the function has no // stack-probe-size attribute. unsigned StackProbeSize = MF.getFunction().getFnAttributeAsParsedInteger("stack-probe-size", 4096); // Round down to the stack alignment. StackProbeSize &= ~(StackAlign - 1); return StackProbeSize ? StackProbeSize : StackAlign; } //===----------------------------------------------------------------------===// // Custom insertion //===----------------------------------------------------------------------===// // Force base value Base into a register before MI. Return the register. static Register forceReg(MachineInstr &MI, MachineOperand &Base, const SystemZInstrInfo *TII) { MachineBasicBlock *MBB = MI.getParent(); MachineFunction &MF = *MBB->getParent(); MachineRegisterInfo &MRI = MF.getRegInfo(); if (Base.isReg()) { // Copy Base into a new virtual register to help register coalescing in // cases with multiple uses. Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), Reg) .add(Base); return Reg; } Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg) .add(Base) .addImm(0) .addReg(0); return Reg; } // The CC operand of MI might be missing a kill marker because there // were multiple uses of CC, and ISel didn't know which to mark. // Figure out whether MI should have had a kill marker. static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB) { // Scan forward through BB for a use/def of CC. MachineBasicBlock::iterator miI(std::next(MachineBasicBlock::iterator(MI))); for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) { const MachineInstr& mi = *miI; if (mi.readsRegister(SystemZ::CC, /*TRI=*/nullptr)) return false; if (mi.definesRegister(SystemZ::CC, /*TRI=*/nullptr)) break; // Should have kill-flag - update below. } // If we hit the end of the block, check whether CC is live into a // successor. if (miI == MBB->end()) { for (const MachineBasicBlock *Succ : MBB->successors()) if (Succ->isLiveIn(SystemZ::CC)) return false; } return true; } // Return true if it is OK for this Select pseudo-opcode to be cascaded // together with other Select pseudo-opcodes into a single basic-block with // a conditional jump around it. static bool isSelectPseudo(MachineInstr &MI) { switch (MI.getOpcode()) { case SystemZ::Select32: case SystemZ::Select64: case SystemZ::Select128: case SystemZ::SelectF32: case SystemZ::SelectF64: case SystemZ::SelectF128: case SystemZ::SelectVR32: case SystemZ::SelectVR64: case SystemZ::SelectVR128: return true; default: return false; } } // Helper function, which inserts PHI functions into SinkMBB: // %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ], // where %FalseValue(i) and %TrueValue(i) are taken from Selects. static void createPHIsForSelects(SmallVector &Selects, MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, MachineBasicBlock *SinkMBB) { MachineFunction *MF = TrueMBB->getParent(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); MachineInstr *FirstMI = Selects.front(); unsigned CCValid = FirstMI->getOperand(3).getImm(); unsigned CCMask = FirstMI->getOperand(4).getImm(); MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin(); // As we are creating the PHIs, we have to be careful if there is more than // one. Later Selects may reference the results of earlier Selects, but later // PHIs have to reference the individual true/false inputs from earlier PHIs. // That also means that PHI construction must work forward from earlier to // later, and that the code must maintain a mapping from earlier PHI's // destination registers, and the registers that went into the PHI. DenseMap> RegRewriteTable; for (auto *MI : Selects) { Register DestReg = MI->getOperand(0).getReg(); Register TrueReg = MI->getOperand(1).getReg(); Register FalseReg = MI->getOperand(2).getReg(); // If this Select we are generating is the opposite condition from // the jump we generated, then we have to swap the operands for the // PHI that is going to be generated. if (MI->getOperand(4).getImm() == (CCValid ^ CCMask)) std::swap(TrueReg, FalseReg); if (RegRewriteTable.contains(TrueReg)) TrueReg = RegRewriteTable[TrueReg].first; if (RegRewriteTable.contains(FalseReg)) FalseReg = RegRewriteTable[FalseReg].second; DebugLoc DL = MI->getDebugLoc(); BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg) .addReg(TrueReg).addMBB(TrueMBB) .addReg(FalseReg).addMBB(FalseMBB); // Add this PHI to the rewrite table. RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg); } MF->getProperties().reset(MachineFunctionProperties::Property::NoPHIs); } MachineBasicBlock * SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI, MachineBasicBlock *BB) const { MachineFunction &MF = *BB->getParent(); MachineFrameInfo &MFI = MF.getFrameInfo(); auto *TFL = Subtarget.getFrameLowering(); assert(TFL->hasReservedCallFrame(MF) && "ADJSTACKDOWN and ADJSTACKUP should be no-ops"); (void)TFL; // Get the MaxCallFrameSize value and erase MI since it serves no further // purpose as the call frame is statically reserved in the prolog. Set // AdjustsStack as MI is *not* mapped as a frame instruction. uint32_t NumBytes = MI.getOperand(0).getImm(); if (NumBytes > MFI.getMaxCallFrameSize()) MFI.setMaxCallFrameSize(NumBytes); MFI.setAdjustsStack(true); MI.eraseFromParent(); return BB; } // Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI. MachineBasicBlock * SystemZTargetLowering::emitSelect(MachineInstr &MI, MachineBasicBlock *MBB) const { assert(isSelectPseudo(MI) && "Bad call to emitSelect()"); const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); unsigned CCValid = MI.getOperand(3).getImm(); unsigned CCMask = MI.getOperand(4).getImm(); // If we have a sequence of Select* pseudo instructions using the // same condition code value, we want to expand all of them into // a single pair of basic blocks using the same condition. SmallVector Selects; SmallVector DbgValues; Selects.push_back(&MI); unsigned Count = 0; for (MachineInstr &NextMI : llvm::make_range( std::next(MachineBasicBlock::iterator(MI)), MBB->end())) { if (isSelectPseudo(NextMI)) { assert(NextMI.getOperand(3).getImm() == CCValid && "Bad CCValid operands since CC was not redefined."); if (NextMI.getOperand(4).getImm() == CCMask || NextMI.getOperand(4).getImm() == (CCValid ^ CCMask)) { Selects.push_back(&NextMI); continue; } break; } if (NextMI.definesRegister(SystemZ::CC, /*TRI=*/nullptr) || NextMI.usesCustomInsertionHook()) break; bool User = false; for (auto *SelMI : Selects) if (NextMI.readsVirtualRegister(SelMI->getOperand(0).getReg())) { User = true; break; } if (NextMI.isDebugInstr()) { if (User) { assert(NextMI.isDebugValue() && "Unhandled debug opcode."); DbgValues.push_back(&NextMI); } } else if (User || ++Count > 20) break; } MachineInstr *LastMI = Selects.back(); bool CCKilled = (LastMI->killsRegister(SystemZ::CC, /*TRI=*/nullptr) || checkCCKill(*LastMI, MBB)); MachineBasicBlock *StartMBB = MBB; MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(LastMI, MBB); MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB); // Unless CC was killed in the last Select instruction, mark it as // live-in to both FalseMBB and JoinMBB. if (!CCKilled) { FalseMBB->addLiveIn(SystemZ::CC); JoinMBB->addLiveIn(SystemZ::CC); } // StartMBB: // BRC CCMask, JoinMBB // # fallthrough to FalseMBB MBB = StartMBB; BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC)) .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB); MBB->addSuccessor(JoinMBB); MBB->addSuccessor(FalseMBB); // FalseMBB: // # fallthrough to JoinMBB MBB = FalseMBB; MBB->addSuccessor(JoinMBB); // JoinMBB: // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ] // ... MBB = JoinMBB; createPHIsForSelects(Selects, StartMBB, FalseMBB, MBB); for (auto *SelMI : Selects) SelMI->eraseFromParent(); MachineBasicBlock::iterator InsertPos = MBB->getFirstNonPHI(); for (auto *DbgMI : DbgValues) MBB->splice(InsertPos, StartMBB, DbgMI); return JoinMBB; } // Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI. // StoreOpcode is the store to use and Invert says whether the store should // happen when the condition is false rather than true. If a STORE ON // CONDITION is available, STOCOpcode is its opcode, otherwise it is 0. MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI, MachineBasicBlock *MBB, unsigned StoreOpcode, unsigned STOCOpcode, bool Invert) const { const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); Register SrcReg = MI.getOperand(0).getReg(); MachineOperand Base = MI.getOperand(1); int64_t Disp = MI.getOperand(2).getImm(); Register IndexReg = MI.getOperand(3).getReg(); unsigned CCValid = MI.getOperand(4).getImm(); unsigned CCMask = MI.getOperand(5).getImm(); DebugLoc DL = MI.getDebugLoc(); StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp); // ISel pattern matching also adds a load memory operand of the same // address, so take special care to find the storing memory operand. MachineMemOperand *MMO = nullptr; for (auto *I : MI.memoperands()) if (I->isStore()) { MMO = I; break; } // Use STOCOpcode if possible. We could use different store patterns in // order to avoid matching the index register, but the performance trade-offs // might be more complicated in that case. if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) { if (Invert) CCMask ^= CCValid; BuildMI(*MBB, MI, DL, TII->get(STOCOpcode)) .addReg(SrcReg) .add(Base) .addImm(Disp) .addImm(CCValid) .addImm(CCMask) .addMemOperand(MMO); MI.eraseFromParent(); return MBB; } // Get the condition needed to branch around the store. if (!Invert) CCMask ^= CCValid; MachineBasicBlock *StartMBB = MBB; MachineBasicBlock *JoinMBB = SystemZ::splitBlockBefore(MI, MBB); MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB); // Unless CC was killed in the CondStore instruction, mark it as // live-in to both FalseMBB and JoinMBB. if (!MI.killsRegister(SystemZ::CC, /*TRI=*/nullptr) && !checkCCKill(MI, JoinMBB)) { FalseMBB->addLiveIn(SystemZ::CC); JoinMBB->addLiveIn(SystemZ::CC); } // StartMBB: // BRC CCMask, JoinMBB // # fallthrough to FalseMBB MBB = StartMBB; BuildMI(MBB, DL, TII->get(SystemZ::BRC)) .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB); MBB->addSuccessor(JoinMBB); MBB->addSuccessor(FalseMBB); // FalseMBB: // store %SrcReg, %Disp(%Index,%Base) // # fallthrough to JoinMBB MBB = FalseMBB; BuildMI(MBB, DL, TII->get(StoreOpcode)) .addReg(SrcReg) .add(Base) .addImm(Disp) .addReg(IndexReg) .addMemOperand(MMO); MBB->addSuccessor(JoinMBB); MI.eraseFromParent(); return JoinMBB; } // Implement EmitInstrWithCustomInserter for pseudo [SU]Cmp128Hi instruction MI. MachineBasicBlock * SystemZTargetLowering::emitICmp128Hi(MachineInstr &MI, MachineBasicBlock *MBB, bool Unsigned) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); // Synthetic instruction to compare 128-bit values. // Sets CC 1 if Op0 > Op1, sets a different CC otherwise. Register Op0 = MI.getOperand(0).getReg(); Register Op1 = MI.getOperand(1).getReg(); MachineBasicBlock *StartMBB = MBB; MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(MI, MBB); MachineBasicBlock *HiEqMBB = SystemZ::emitBlockAfter(StartMBB); // StartMBB: // // Use VECTOR ELEMENT COMPARE [LOGICAL] to compare the high parts. // Swap the inputs to get: // CC 1 if high(Op0) > high(Op1) // CC 2 if high(Op0) < high(Op1) // CC 0 if high(Op0) == high(Op1) // // If CC != 0, we'd done, so jump over the next instruction. // // VEC[L]G Op1, Op0 // JNE JoinMBB // # fallthrough to HiEqMBB MBB = StartMBB; int HiOpcode = Unsigned? SystemZ::VECLG : SystemZ::VECG; BuildMI(MBB, MI.getDebugLoc(), TII->get(HiOpcode)) .addReg(Op1).addReg(Op0); BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC)) .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE).addMBB(JoinMBB); MBB->addSuccessor(JoinMBB); MBB->addSuccessor(HiEqMBB); // HiEqMBB: // // Otherwise, use VECTOR COMPARE HIGH LOGICAL. // Since we already know the high parts are equal, the CC // result will only depend on the low parts: // CC 1 if low(Op0) > low(Op1) // CC 3 if low(Op0) <= low(Op1) // // VCHLGS Tmp, Op0, Op1 // # fallthrough to JoinMBB MBB = HiEqMBB; Register Temp = MRI.createVirtualRegister(&SystemZ::VR128BitRegClass); BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::VCHLGS), Temp) .addReg(Op0).addReg(Op1); MBB->addSuccessor(JoinMBB); // Mark CC as live-in to JoinMBB. JoinMBB->addLiveIn(SystemZ::CC); MI.eraseFromParent(); return JoinMBB; } // Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_LOADW_* or // ATOMIC_SWAPW instruction MI. BinOpcode is the instruction that performs // the binary operation elided by "*", or 0 for ATOMIC_SWAPW. Invert says // whether the field should be inverted after performing BinOpcode (e.g. for // NAND). MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary( MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode, bool Invert) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); // Extract the operands. Base can be a register or a frame index. // Src2 can be a register or immediate. Register Dest = MI.getOperand(0).getReg(); MachineOperand Base = earlyUseOperand(MI.getOperand(1)); int64_t Disp = MI.getOperand(2).getImm(); MachineOperand Src2 = earlyUseOperand(MI.getOperand(3)); Register BitShift = MI.getOperand(4).getReg(); Register NegBitShift = MI.getOperand(5).getReg(); unsigned BitSize = MI.getOperand(6).getImm(); DebugLoc DL = MI.getDebugLoc(); // Get the right opcodes for the displacement. unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp); unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp); assert(LOpcode && CSOpcode && "Displacement out of range"); // Create virtual registers for temporary results. Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); // Insert a basic block for the main loop. MachineBasicBlock *StartMBB = MBB; MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB); // StartMBB: // ... // %OrigVal = L Disp(%Base) // # fall through to LoopMBB MBB = StartMBB; BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0); MBB->addSuccessor(LoopMBB); // LoopMBB: // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ] // %RotatedOldVal = RLL %OldVal, 0(%BitShift) // %RotatedNewVal = OP %RotatedOldVal, %Src2 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift) // %Dest = CS %OldVal, %NewVal, Disp(%Base) // JNE LoopMBB // # fall through to DoneMBB MBB = LoopMBB; BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) .addReg(OrigVal).addMBB(StartMBB) .addReg(Dest).addMBB(LoopMBB); BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal) .addReg(OldVal).addReg(BitShift).addImm(0); if (Invert) { // Perform the operation normally and then invert every bit of the field. Register Tmp = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2); // XILF with the upper BitSize bits set. BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal) .addReg(Tmp).addImm(-1U << (32 - BitSize)); } else if (BinOpcode) // A simply binary operation. BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal) .addReg(RotatedOldVal) .add(Src2); else // Use RISBG to rotate Src2 into position and use it to replace the // field in RotatedOldVal. BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal) .addReg(RotatedOldVal).addReg(Src2.getReg()) .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize); BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal) .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0); BuildMI(MBB, DL, TII->get(CSOpcode), Dest) .addReg(OldVal) .addReg(NewVal) .add(Base) .addImm(Disp); BuildMI(MBB, DL, TII->get(SystemZ::BRC)) .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); MBB->addSuccessor(LoopMBB); MBB->addSuccessor(DoneMBB); MI.eraseFromParent(); return DoneMBB; } // Implement EmitInstrWithCustomInserter for subword pseudo // ATOMIC_LOADW_{,U}{MIN,MAX} instruction MI. CompareOpcode is the // instruction that should be used to compare the current field with the // minimum or maximum value. KeepOldMask is the BRC condition-code mask // for when the current field should be kept. MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax( MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode, unsigned KeepOldMask) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); // Extract the operands. Base can be a register or a frame index. Register Dest = MI.getOperand(0).getReg(); MachineOperand Base = earlyUseOperand(MI.getOperand(1)); int64_t Disp = MI.getOperand(2).getImm(); Register Src2 = MI.getOperand(3).getReg(); Register BitShift = MI.getOperand(4).getReg(); Register NegBitShift = MI.getOperand(5).getReg(); unsigned BitSize = MI.getOperand(6).getImm(); DebugLoc DL = MI.getDebugLoc(); // Get the right opcodes for the displacement. unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp); unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp); assert(LOpcode && CSOpcode && "Displacement out of range"); // Create virtual registers for temporary results. Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); Register RotatedAltVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); // Insert 3 basic blocks for the loop. MachineBasicBlock *StartMBB = MBB; MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB); MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB); MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB); // StartMBB: // ... // %OrigVal = L Disp(%Base) // # fall through to LoopMBB MBB = StartMBB; BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0); MBB->addSuccessor(LoopMBB); // LoopMBB: // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ] // %RotatedOldVal = RLL %OldVal, 0(%BitShift) // CompareOpcode %RotatedOldVal, %Src2 // BRC KeepOldMask, UpdateMBB MBB = LoopMBB; BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) .addReg(OrigVal).addMBB(StartMBB) .addReg(Dest).addMBB(UpdateMBB); BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal) .addReg(OldVal).addReg(BitShift).addImm(0); BuildMI(MBB, DL, TII->get(CompareOpcode)) .addReg(RotatedOldVal).addReg(Src2); BuildMI(MBB, DL, TII->get(SystemZ::BRC)) .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB); MBB->addSuccessor(UpdateMBB); MBB->addSuccessor(UseAltMBB); // UseAltMBB: // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0 // # fall through to UpdateMBB MBB = UseAltMBB; BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal) .addReg(RotatedOldVal).addReg(Src2) .addImm(32).addImm(31 + BitSize).addImm(0); MBB->addSuccessor(UpdateMBB); // UpdateMBB: // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ], // [ %RotatedAltVal, UseAltMBB ] // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift) // %Dest = CS %OldVal, %NewVal, Disp(%Base) // JNE LoopMBB // # fall through to DoneMBB MBB = UpdateMBB; BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal) .addReg(RotatedOldVal).addMBB(LoopMBB) .addReg(RotatedAltVal).addMBB(UseAltMBB); BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal) .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0); BuildMI(MBB, DL, TII->get(CSOpcode), Dest) .addReg(OldVal) .addReg(NewVal) .add(Base) .addImm(Disp); BuildMI(MBB, DL, TII->get(SystemZ::BRC)) .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); MBB->addSuccessor(LoopMBB); MBB->addSuccessor(DoneMBB); MI.eraseFromParent(); return DoneMBB; } // Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_CMP_SWAPW // instruction MI. MachineBasicBlock * SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI, MachineBasicBlock *MBB) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); // Extract the operands. Base can be a register or a frame index. Register Dest = MI.getOperand(0).getReg(); MachineOperand Base = earlyUseOperand(MI.getOperand(1)); int64_t Disp = MI.getOperand(2).getImm(); Register CmpVal = MI.getOperand(3).getReg(); Register OrigSwapVal = MI.getOperand(4).getReg(); Register BitShift = MI.getOperand(5).getReg(); Register NegBitShift = MI.getOperand(6).getReg(); int64_t BitSize = MI.getOperand(7).getImm(); DebugLoc DL = MI.getDebugLoc(); const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass; // Get the right opcodes for the displacement and zero-extension. unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp); unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp); unsigned ZExtOpcode = BitSize == 8 ? SystemZ::LLCR : SystemZ::LLHR; assert(LOpcode && CSOpcode && "Displacement out of range"); // Create virtual registers for temporary results. Register OrigOldVal = MRI.createVirtualRegister(RC); Register OldVal = MRI.createVirtualRegister(RC); Register SwapVal = MRI.createVirtualRegister(RC); Register StoreVal = MRI.createVirtualRegister(RC); Register OldValRot = MRI.createVirtualRegister(RC); Register RetryOldVal = MRI.createVirtualRegister(RC); Register RetrySwapVal = MRI.createVirtualRegister(RC); // Insert 2 basic blocks for the loop. MachineBasicBlock *StartMBB = MBB; MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB); MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB); // StartMBB: // ... // %OrigOldVal = L Disp(%Base) // # fall through to LoopMBB MBB = StartMBB; BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal) .add(Base) .addImm(Disp) .addReg(0); MBB->addSuccessor(LoopMBB); // LoopMBB: // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ] // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ] // %OldValRot = RLL %OldVal, BitSize(%BitShift) // ^^ The low BitSize bits contain the field // of interest. // %RetrySwapVal = RISBG32 %SwapVal, %OldValRot, 32, 63-BitSize, 0 // ^^ Replace the upper 32-BitSize bits of the // swap value with those that we loaded and rotated. // %Dest = LL[CH] %OldValRot // CR %Dest, %CmpVal // JNE DoneMBB // # Fall through to SetMBB MBB = LoopMBB; BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) .addReg(OrigOldVal).addMBB(StartMBB) .addReg(RetryOldVal).addMBB(SetMBB); BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal) .addReg(OrigSwapVal).addMBB(StartMBB) .addReg(RetrySwapVal).addMBB(SetMBB); BuildMI(MBB, DL, TII->get(SystemZ::RLL), OldValRot) .addReg(OldVal).addReg(BitShift).addImm(BitSize); BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal) .addReg(SwapVal).addReg(OldValRot).addImm(32).addImm(63 - BitSize).addImm(0); BuildMI(MBB, DL, TII->get(ZExtOpcode), Dest) .addReg(OldValRot); BuildMI(MBB, DL, TII->get(SystemZ::CR)) .addReg(Dest).addReg(CmpVal); BuildMI(MBB, DL, TII->get(SystemZ::BRC)) .addImm(SystemZ::CCMASK_ICMP) .addImm(SystemZ::CCMASK_CMP_NE).addMBB(DoneMBB); MBB->addSuccessor(DoneMBB); MBB->addSuccessor(SetMBB); // SetMBB: // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift) // ^^ Rotate the new field to its proper position. // %RetryOldVal = CS %OldVal, %StoreVal, Disp(%Base) // JNE LoopMBB // # fall through to ExitMBB MBB = SetMBB; BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal) .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize); BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal) .addReg(OldVal) .addReg(StoreVal) .add(Base) .addImm(Disp); BuildMI(MBB, DL, TII->get(SystemZ::BRC)) .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); MBB->addSuccessor(LoopMBB); MBB->addSuccessor(DoneMBB); // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in // to the block after the loop. At this point, CC may have been defined // either by the CR in LoopMBB or by the CS in SetMBB. if (!MI.registerDefIsDead(SystemZ::CC, /*TRI=*/nullptr)) DoneMBB->addLiveIn(SystemZ::CC); MI.eraseFromParent(); return DoneMBB; } // Emit a move from two GR64s to a GR128. MachineBasicBlock * SystemZTargetLowering::emitPair128(MachineInstr &MI, MachineBasicBlock *MBB) const { const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); const DebugLoc &DL = MI.getDebugLoc(); Register Dest = MI.getOperand(0).getReg(); BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), Dest) .add(MI.getOperand(1)) .addImm(SystemZ::subreg_h64) .add(MI.getOperand(2)) .addImm(SystemZ::subreg_l64); MI.eraseFromParent(); return MBB; } // Emit an extension from a GR64 to a GR128. ClearEven is true // if the high register of the GR128 value must be cleared or false if // it's "don't care". MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI, MachineBasicBlock *MBB, bool ClearEven) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); DebugLoc DL = MI.getDebugLoc(); Register Dest = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); Register In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128); if (ClearEven) { Register NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); Register Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass); BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64) .addImm(0); BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128) .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64); In128 = NewIn128; } BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest) .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64); MI.eraseFromParent(); return MBB; } MachineBasicBlock * SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode, bool IsMemset) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); DebugLoc DL = MI.getDebugLoc(); MachineOperand DestBase = earlyUseOperand(MI.getOperand(0)); uint64_t DestDisp = MI.getOperand(1).getImm(); MachineOperand SrcBase = MachineOperand::CreateReg(0U, false); uint64_t SrcDisp; // Fold the displacement Disp if it is out of range. auto foldDisplIfNeeded = [&](MachineOperand &Base, uint64_t &Disp) -> void { if (!isUInt<12>(Disp)) { Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); unsigned Opcode = TII->getOpcodeForOffset(SystemZ::LA, Disp); BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode), Reg) .add(Base).addImm(Disp).addReg(0); Base = MachineOperand::CreateReg(Reg, false); Disp = 0; } }; if (!IsMemset) { SrcBase = earlyUseOperand(MI.getOperand(2)); SrcDisp = MI.getOperand(3).getImm(); } else { SrcBase = DestBase; SrcDisp = DestDisp++; foldDisplIfNeeded(DestBase, DestDisp); } MachineOperand &LengthMO = MI.getOperand(IsMemset ? 2 : 4); bool IsImmForm = LengthMO.isImm(); bool IsRegForm = !IsImmForm; // Build and insert one Opcode of Length, with special treatment for memset. auto insertMemMemOp = [&](MachineBasicBlock *InsMBB, MachineBasicBlock::iterator InsPos, MachineOperand DBase, uint64_t DDisp, MachineOperand SBase, uint64_t SDisp, unsigned Length) -> void { assert(Length > 0 && Length <= 256 && "Building memory op with bad length."); if (IsMemset) { MachineOperand ByteMO = earlyUseOperand(MI.getOperand(3)); if (ByteMO.isImm()) BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::MVI)) .add(SBase).addImm(SDisp).add(ByteMO); else BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::STC)) .add(ByteMO).add(SBase).addImm(SDisp).addReg(0); if (--Length == 0) return; } BuildMI(*MBB, InsPos, DL, TII->get(Opcode)) .add(DBase).addImm(DDisp).addImm(Length) .add(SBase).addImm(SDisp) .setMemRefs(MI.memoperands()); }; bool NeedsLoop = false; uint64_t ImmLength = 0; Register LenAdjReg = SystemZ::NoRegister; if (IsImmForm) { ImmLength = LengthMO.getImm(); ImmLength += IsMemset ? 2 : 1; // Add back the subtracted adjustment. if (ImmLength == 0) { MI.eraseFromParent(); return MBB; } if (Opcode == SystemZ::CLC) { if (ImmLength > 3 * 256) // A two-CLC sequence is a clear win over a loop, not least because // it needs only one branch. A three-CLC sequence needs the same // number of branches as a loop (i.e. 2), but is shorter. That // brings us to lengths greater than 768 bytes. It seems relatively // likely that a difference will be found within the first 768 bytes, // so we just optimize for the smallest number of branch // instructions, in order to avoid polluting the prediction buffer // too much. NeedsLoop = true; } else if (ImmLength > 6 * 256) // The heuristic we use is to prefer loops for anything that would // require 7 or more MVCs. With these kinds of sizes there isn't much // to choose between straight-line code and looping code, since the // time will be dominated by the MVCs themselves. NeedsLoop = true; } else { NeedsLoop = true; LenAdjReg = LengthMO.getReg(); } // When generating more than one CLC, all but the last will need to // branch to the end when a difference is found. MachineBasicBlock *EndMBB = (Opcode == SystemZ::CLC && (ImmLength > 256 || NeedsLoop) ? SystemZ::splitBlockAfter(MI, MBB) : nullptr); if (NeedsLoop) { Register StartCountReg = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass); if (IsImmForm) { TII->loadImmediate(*MBB, MI, StartCountReg, ImmLength / 256); ImmLength &= 255; } else { BuildMI(*MBB, MI, DL, TII->get(SystemZ::SRLG), StartCountReg) .addReg(LenAdjReg) .addReg(0) .addImm(8); } bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase); auto loadZeroAddress = [&]() -> MachineOperand { Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); BuildMI(*MBB, MI, DL, TII->get(SystemZ::LGHI), Reg).addImm(0); return MachineOperand::CreateReg(Reg, false); }; if (DestBase.isReg() && DestBase.getReg() == SystemZ::NoRegister) DestBase = loadZeroAddress(); if (SrcBase.isReg() && SrcBase.getReg() == SystemZ::NoRegister) SrcBase = HaveSingleBase ? DestBase : loadZeroAddress(); MachineBasicBlock *StartMBB = nullptr; MachineBasicBlock *LoopMBB = nullptr; MachineBasicBlock *NextMBB = nullptr; MachineBasicBlock *DoneMBB = nullptr; MachineBasicBlock *AllDoneMBB = nullptr; Register StartSrcReg = forceReg(MI, SrcBase, TII); Register StartDestReg = (HaveSingleBase ? StartSrcReg : forceReg(MI, DestBase, TII)); const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass; Register ThisSrcReg = MRI.createVirtualRegister(RC); Register ThisDestReg = (HaveSingleBase ? ThisSrcReg : MRI.createVirtualRegister(RC)); Register NextSrcReg = MRI.createVirtualRegister(RC); Register NextDestReg = (HaveSingleBase ? NextSrcReg : MRI.createVirtualRegister(RC)); RC = &SystemZ::GR64BitRegClass; Register ThisCountReg = MRI.createVirtualRegister(RC); Register NextCountReg = MRI.createVirtualRegister(RC); if (IsRegForm) { AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB); StartMBB = SystemZ::emitBlockAfter(MBB); LoopMBB = SystemZ::emitBlockAfter(StartMBB); NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB); DoneMBB = SystemZ::emitBlockAfter(NextMBB); // MBB: // # Jump to AllDoneMBB if LenAdjReg means 0, or fall thru to StartMBB. BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) .addReg(LenAdjReg).addImm(IsMemset ? -2 : -1); BuildMI(MBB, DL, TII->get(SystemZ::BRC)) .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ) .addMBB(AllDoneMBB); MBB->addSuccessor(AllDoneMBB); if (!IsMemset) MBB->addSuccessor(StartMBB); else { // MemsetOneCheckMBB: // # Jump to MemsetOneMBB for a memset of length 1, or // # fall thru to StartMBB. MachineBasicBlock *MemsetOneCheckMBB = SystemZ::emitBlockAfter(MBB); MachineBasicBlock *MemsetOneMBB = SystemZ::emitBlockAfter(&*MF.rbegin()); MBB->addSuccessor(MemsetOneCheckMBB); MBB = MemsetOneCheckMBB; BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) .addReg(LenAdjReg).addImm(-1); BuildMI(MBB, DL, TII->get(SystemZ::BRC)) .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ) .addMBB(MemsetOneMBB); MBB->addSuccessor(MemsetOneMBB, {10, 100}); MBB->addSuccessor(StartMBB, {90, 100}); // MemsetOneMBB: // # Jump back to AllDoneMBB after a single MVI or STC. MBB = MemsetOneMBB; insertMemMemOp(MBB, MBB->end(), MachineOperand::CreateReg(StartDestReg, false), DestDisp, MachineOperand::CreateReg(StartSrcReg, false), SrcDisp, 1); BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(AllDoneMBB); MBB->addSuccessor(AllDoneMBB); } // StartMBB: // # Jump to DoneMBB if %StartCountReg is zero, or fall through to LoopMBB. MBB = StartMBB; BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) .addReg(StartCountReg).addImm(0); BuildMI(MBB, DL, TII->get(SystemZ::BRC)) .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ) .addMBB(DoneMBB); MBB->addSuccessor(DoneMBB); MBB->addSuccessor(LoopMBB); } else { StartMBB = MBB; DoneMBB = SystemZ::splitBlockBefore(MI, MBB); LoopMBB = SystemZ::emitBlockAfter(StartMBB); NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB); // StartMBB: // # fall through to LoopMBB MBB->addSuccessor(LoopMBB); DestBase = MachineOperand::CreateReg(NextDestReg, false); SrcBase = MachineOperand::CreateReg(NextSrcReg, false); if (EndMBB && !ImmLength) // If the loop handled the whole CLC range, DoneMBB will be empty with // CC live-through into EndMBB, so add it as live-in. DoneMBB->addLiveIn(SystemZ::CC); } // LoopMBB: // %ThisDestReg = phi [ %StartDestReg, StartMBB ], // [ %NextDestReg, NextMBB ] // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ], // [ %NextSrcReg, NextMBB ] // %ThisCountReg = phi [ %StartCountReg, StartMBB ], // [ %NextCountReg, NextMBB ] // ( PFD 2, 768+DestDisp(%ThisDestReg) ) // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg) // ( JLH EndMBB ) // // The prefetch is used only for MVC. The JLH is used only for CLC. MBB = LoopMBB; BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg) .addReg(StartDestReg).addMBB(StartMBB) .addReg(NextDestReg).addMBB(NextMBB); if (!HaveSingleBase) BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg) .addReg(StartSrcReg).addMBB(StartMBB) .addReg(NextSrcReg).addMBB(NextMBB); BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg) .addReg(StartCountReg).addMBB(StartMBB) .addReg(NextCountReg).addMBB(NextMBB); if (Opcode == SystemZ::MVC) BuildMI(MBB, DL, TII->get(SystemZ::PFD)) .addImm(SystemZ::PFD_WRITE) .addReg(ThisDestReg).addImm(DestDisp - IsMemset + 768).addReg(0); insertMemMemOp(MBB, MBB->end(), MachineOperand::CreateReg(ThisDestReg, false), DestDisp, MachineOperand::CreateReg(ThisSrcReg, false), SrcDisp, 256); if (EndMBB) { BuildMI(MBB, DL, TII->get(SystemZ::BRC)) .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) .addMBB(EndMBB); MBB->addSuccessor(EndMBB); MBB->addSuccessor(NextMBB); } // NextMBB: // %NextDestReg = LA 256(%ThisDestReg) // %NextSrcReg = LA 256(%ThisSrcReg) // %NextCountReg = AGHI %ThisCountReg, -1 // CGHI %NextCountReg, 0 // JLH LoopMBB // # fall through to DoneMBB // // The AGHI, CGHI and JLH should be converted to BRCTG by later passes. MBB = NextMBB; BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg) .addReg(ThisDestReg).addImm(256).addReg(0); if (!HaveSingleBase) BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg) .addReg(ThisSrcReg).addImm(256).addReg(0); BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg) .addReg(ThisCountReg).addImm(-1); BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) .addReg(NextCountReg).addImm(0); BuildMI(MBB, DL, TII->get(SystemZ::BRC)) .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) .addMBB(LoopMBB); MBB->addSuccessor(LoopMBB); MBB->addSuccessor(DoneMBB); MBB = DoneMBB; if (IsRegForm) { // DoneMBB: // # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run. // # Use EXecute Relative Long for the remainder of the bytes. The target // instruction of the EXRL will have a length field of 1 since 0 is an // illegal value. The number of bytes processed becomes (%LenAdjReg & // 0xff) + 1. // # Fall through to AllDoneMBB. Register RemSrcReg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); Register RemDestReg = HaveSingleBase ? RemSrcReg : MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg) .addReg(StartDestReg).addMBB(StartMBB) .addReg(NextDestReg).addMBB(NextMBB); if (!HaveSingleBase) BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg) .addReg(StartSrcReg).addMBB(StartMBB) .addReg(NextSrcReg).addMBB(NextMBB); if (IsMemset) insertMemMemOp(MBB, MBB->end(), MachineOperand::CreateReg(RemDestReg, false), DestDisp, MachineOperand::CreateReg(RemSrcReg, false), SrcDisp, 1); MachineInstrBuilder EXRL_MIB = BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo)) .addImm(Opcode) .addReg(LenAdjReg) .addReg(RemDestReg).addImm(DestDisp) .addReg(RemSrcReg).addImm(SrcDisp); MBB->addSuccessor(AllDoneMBB); MBB = AllDoneMBB; if (Opcode != SystemZ::MVC) { EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine); if (EndMBB) MBB->addLiveIn(SystemZ::CC); } } MF.getProperties().reset(MachineFunctionProperties::Property::NoPHIs); } // Handle any remaining bytes with straight-line code. while (ImmLength > 0) { uint64_t ThisLength = std::min(ImmLength, uint64_t(256)); // The previous iteration might have created out-of-range displacements. // Apply them using LA/LAY if so. foldDisplIfNeeded(DestBase, DestDisp); foldDisplIfNeeded(SrcBase, SrcDisp); insertMemMemOp(MBB, MI, DestBase, DestDisp, SrcBase, SrcDisp, ThisLength); DestDisp += ThisLength; SrcDisp += ThisLength; ImmLength -= ThisLength; // If there's another CLC to go, branch to the end if a difference // was found. if (EndMBB && ImmLength > 0) { MachineBasicBlock *NextMBB = SystemZ::splitBlockBefore(MI, MBB); BuildMI(MBB, DL, TII->get(SystemZ::BRC)) .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) .addMBB(EndMBB); MBB->addSuccessor(EndMBB); MBB->addSuccessor(NextMBB); MBB = NextMBB; } } if (EndMBB) { MBB->addSuccessor(EndMBB); MBB = EndMBB; MBB->addLiveIn(SystemZ::CC); } MI.eraseFromParent(); return MBB; } // Decompose string pseudo-instruction MI into a loop that continually performs // Opcode until CC != 3. MachineBasicBlock *SystemZTargetLowering::emitStringWrapper( MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); DebugLoc DL = MI.getDebugLoc(); uint64_t End1Reg = MI.getOperand(0).getReg(); uint64_t Start1Reg = MI.getOperand(1).getReg(); uint64_t Start2Reg = MI.getOperand(2).getReg(); uint64_t CharReg = MI.getOperand(3).getReg(); const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass; uint64_t This1Reg = MRI.createVirtualRegister(RC); uint64_t This2Reg = MRI.createVirtualRegister(RC); uint64_t End2Reg = MRI.createVirtualRegister(RC); MachineBasicBlock *StartMBB = MBB; MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB); // StartMBB: // # fall through to LoopMBB MBB->addSuccessor(LoopMBB); // LoopMBB: // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ] // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ] // R0L = %CharReg // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L // JO LoopMBB // # fall through to DoneMBB // // The load of R0L can be hoisted by post-RA LICM. MBB = LoopMBB; BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg) .addReg(Start1Reg).addMBB(StartMBB) .addReg(End1Reg).addMBB(LoopMBB); BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg) .addReg(Start2Reg).addMBB(StartMBB) .addReg(End2Reg).addMBB(LoopMBB); BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg); BuildMI(MBB, DL, TII->get(Opcode)) .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define) .addReg(This1Reg).addReg(This2Reg); BuildMI(MBB, DL, TII->get(SystemZ::BRC)) .addImm(SystemZ::CCMASK_ANY).addImm(SystemZ::CCMASK_3).addMBB(LoopMBB); MBB->addSuccessor(LoopMBB); MBB->addSuccessor(DoneMBB); DoneMBB->addLiveIn(SystemZ::CC); MI.eraseFromParent(); return DoneMBB; } // Update TBEGIN instruction with final opcode and register clobbers. MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin( MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode, bool NoFloat) const { MachineFunction &MF = *MBB->getParent(); const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); // Update opcode. MI.setDesc(TII->get(Opcode)); // We cannot handle a TBEGIN that clobbers the stack or frame pointer. // Make sure to add the corresponding GRSM bits if they are missing. uint64_t Control = MI.getOperand(2).getImm(); static const unsigned GPRControlBit[16] = { 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000, 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100 }; Control |= GPRControlBit[15]; if (TFI->hasFP(MF)) Control |= GPRControlBit[11]; MI.getOperand(2).setImm(Control); // Add GPR clobbers. for (int I = 0; I < 16; I++) { if ((Control & GPRControlBit[I]) == 0) { unsigned Reg = SystemZMC::GR64Regs[I]; MI.addOperand(MachineOperand::CreateReg(Reg, true, true)); } } // Add FPR/VR clobbers. if (!NoFloat && (Control & 4) != 0) { if (Subtarget.hasVector()) { for (unsigned Reg : SystemZMC::VR128Regs) { MI.addOperand(MachineOperand::CreateReg(Reg, true, true)); } } else { for (unsigned Reg : SystemZMC::FP64Regs) { MI.addOperand(MachineOperand::CreateReg(Reg, true, true)); } } } return MBB; } MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0( MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const { MachineFunction &MF = *MBB->getParent(); MachineRegisterInfo *MRI = &MF.getRegInfo(); const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); Register SrcReg = MI.getOperand(0).getReg(); // Create new virtual register of the same class as source. const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); Register DstReg = MRI->createVirtualRegister(RC); // Replace pseudo with a normal load-and-test that models the def as // well. BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg) .addReg(SrcReg) .setMIFlags(MI.getFlags()); MI.eraseFromParent(); return MBB; } MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca( MachineInstr &MI, MachineBasicBlock *MBB) const { MachineFunction &MF = *MBB->getParent(); MachineRegisterInfo *MRI = &MF.getRegInfo(); const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); const unsigned ProbeSize = getStackProbeSize(MF); Register DstReg = MI.getOperand(0).getReg(); Register SizeReg = MI.getOperand(2).getReg(); MachineBasicBlock *StartMBB = MBB; MachineBasicBlock *DoneMBB = SystemZ::splitBlockAfter(MI, MBB); MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB); MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB); MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB); MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB); MachineMemOperand *VolLdMMO = MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad, 8, Align(1)); Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass); Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass); // LoopTestMBB // BRC TailTestMBB // # fallthrough to LoopBodyMBB StartMBB->addSuccessor(LoopTestMBB); MBB = LoopTestMBB; BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg) .addReg(SizeReg) .addMBB(StartMBB) .addReg(IncReg) .addMBB(LoopBodyMBB); BuildMI(MBB, DL, TII->get(SystemZ::CLGFI)) .addReg(PHIReg) .addImm(ProbeSize); BuildMI(MBB, DL, TII->get(SystemZ::BRC)) .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_LT) .addMBB(TailTestMBB); MBB->addSuccessor(LoopBodyMBB); MBB->addSuccessor(TailTestMBB); // LoopBodyMBB: Allocate and probe by means of a volatile compare. // J LoopTestMBB MBB = LoopBodyMBB; BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg) .addReg(PHIReg) .addImm(ProbeSize); BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D) .addReg(SystemZ::R15D) .addImm(ProbeSize); BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D) .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0) .setMemRefs(VolLdMMO); BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB); MBB->addSuccessor(LoopTestMBB); // TailTestMBB // BRC DoneMBB // # fallthrough to TailMBB MBB = TailTestMBB; BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) .addReg(PHIReg) .addImm(0); BuildMI(MBB, DL, TII->get(SystemZ::BRC)) .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ) .addMBB(DoneMBB); MBB->addSuccessor(TailMBB); MBB->addSuccessor(DoneMBB); // TailMBB // # fallthrough to DoneMBB MBB = TailMBB; BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D) .addReg(SystemZ::R15D) .addReg(PHIReg); BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D) .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg) .setMemRefs(VolLdMMO); MBB->addSuccessor(DoneMBB); // DoneMBB MBB = DoneMBB; BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg) .addReg(SystemZ::R15D); MI.eraseFromParent(); return DoneMBB; } SDValue SystemZTargetLowering:: getBackchainAddress(SDValue SP, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); auto *TFL = Subtarget.getFrameLowering(); SDLoc DL(SP); return DAG.getNode(ISD::ADD, DL, MVT::i64, SP, DAG.getIntPtrConstant(TFL->getBackchainOffset(MF), DL)); } MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter( MachineInstr &MI, MachineBasicBlock *MBB) const { switch (MI.getOpcode()) { case SystemZ::ADJCALLSTACKDOWN: case SystemZ::ADJCALLSTACKUP: return emitAdjCallStack(MI, MBB); case SystemZ::Select32: case SystemZ::Select64: case SystemZ::Select128: case SystemZ::SelectF32: case SystemZ::SelectF64: case SystemZ::SelectF128: case SystemZ::SelectVR32: case SystemZ::SelectVR64: case SystemZ::SelectVR128: return emitSelect(MI, MBB); case SystemZ::CondStore8Mux: return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false); case SystemZ::CondStore8MuxInv: return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true); case SystemZ::CondStore16Mux: return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false); case SystemZ::CondStore16MuxInv: return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true); case SystemZ::CondStore32Mux: return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false); case SystemZ::CondStore32MuxInv: return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true); case SystemZ::CondStore8: return emitCondStore(MI, MBB, SystemZ::STC, 0, false); case SystemZ::CondStore8Inv: return emitCondStore(MI, MBB, SystemZ::STC, 0, true); case SystemZ::CondStore16: return emitCondStore(MI, MBB, SystemZ::STH, 0, false); case SystemZ::CondStore16Inv: return emitCondStore(MI, MBB, SystemZ::STH, 0, true); case SystemZ::CondStore32: return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false); case SystemZ::CondStore32Inv: return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true); case SystemZ::CondStore64: return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false); case SystemZ::CondStore64Inv: return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true); case SystemZ::CondStoreF32: return emitCondStore(MI, MBB, SystemZ::STE, 0, false); case SystemZ::CondStoreF32Inv: return emitCondStore(MI, MBB, SystemZ::STE, 0, true); case SystemZ::CondStoreF64: return emitCondStore(MI, MBB, SystemZ::STD, 0, false); case SystemZ::CondStoreF64Inv: return emitCondStore(MI, MBB, SystemZ::STD, 0, true); case SystemZ::SCmp128Hi: return emitICmp128Hi(MI, MBB, false); case SystemZ::UCmp128Hi: return emitICmp128Hi(MI, MBB, true); case SystemZ::PAIR128: return emitPair128(MI, MBB); case SystemZ::AEXT128: return emitExt128(MI, MBB, false); case SystemZ::ZEXT128: return emitExt128(MI, MBB, true); case SystemZ::ATOMIC_SWAPW: return emitAtomicLoadBinary(MI, MBB, 0); case SystemZ::ATOMIC_LOADW_AR: return emitAtomicLoadBinary(MI, MBB, SystemZ::AR); case SystemZ::ATOMIC_LOADW_AFI: return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI); case SystemZ::ATOMIC_LOADW_SR: return emitAtomicLoadBinary(MI, MBB, SystemZ::SR); case SystemZ::ATOMIC_LOADW_NR: return emitAtomicLoadBinary(MI, MBB, SystemZ::NR); case SystemZ::ATOMIC_LOADW_NILH: return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH); case SystemZ::ATOMIC_LOADW_OR: return emitAtomicLoadBinary(MI, MBB, SystemZ::OR); case SystemZ::ATOMIC_LOADW_OILH: return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH); case SystemZ::ATOMIC_LOADW_XR: return emitAtomicLoadBinary(MI, MBB, SystemZ::XR); case SystemZ::ATOMIC_LOADW_XILF: return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF); case SystemZ::ATOMIC_LOADW_NRi: return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, true); case SystemZ::ATOMIC_LOADW_NILHi: return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, true); case SystemZ::ATOMIC_LOADW_MIN: return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_LE); case SystemZ::ATOMIC_LOADW_MAX: return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_GE); case SystemZ::ATOMIC_LOADW_UMIN: return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_LE); case SystemZ::ATOMIC_LOADW_UMAX: return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_GE); case SystemZ::ATOMIC_CMP_SWAPW: return emitAtomicCmpSwapW(MI, MBB); case SystemZ::MVCImm: case SystemZ::MVCReg: return emitMemMemWrapper(MI, MBB, SystemZ::MVC); case SystemZ::NCImm: return emitMemMemWrapper(MI, MBB, SystemZ::NC); case SystemZ::OCImm: return emitMemMemWrapper(MI, MBB, SystemZ::OC); case SystemZ::XCImm: case SystemZ::XCReg: return emitMemMemWrapper(MI, MBB, SystemZ::XC); case SystemZ::CLCImm: case SystemZ::CLCReg: return emitMemMemWrapper(MI, MBB, SystemZ::CLC); case SystemZ::MemsetImmImm: case SystemZ::MemsetImmReg: case SystemZ::MemsetRegImm: case SystemZ::MemsetRegReg: return emitMemMemWrapper(MI, MBB, SystemZ::MVC, true/*IsMemset*/); case SystemZ::CLSTLoop: return emitStringWrapper(MI, MBB, SystemZ::CLST); case SystemZ::MVSTLoop: return emitStringWrapper(MI, MBB, SystemZ::MVST); case SystemZ::SRSTLoop: return emitStringWrapper(MI, MBB, SystemZ::SRST); case SystemZ::TBEGIN: return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false); case SystemZ::TBEGIN_nofloat: return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true); case SystemZ::TBEGINC: return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true); case SystemZ::LTEBRCompare_Pseudo: return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR); case SystemZ::LTDBRCompare_Pseudo: return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR); case SystemZ::LTXBRCompare_Pseudo: return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR); case SystemZ::PROBED_ALLOCA: return emitProbedAlloca(MI, MBB); case TargetOpcode::STACKMAP: case TargetOpcode::PATCHPOINT: return emitPatchPoint(MI, MBB); default: llvm_unreachable("Unexpected instr type to insert"); } } // This is only used by the isel schedulers, and is needed only to prevent // compiler from crashing when list-ilp is used. const TargetRegisterClass * SystemZTargetLowering::getRepRegClassFor(MVT VT) const { if (VT == MVT::Untyped) return &SystemZ::ADDR128BitRegClass; return TargetLowering::getRepRegClassFor(VT); } SDValue SystemZTargetLowering::lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); /* The rounding method is in FPC Byte 3 bits 6-7, and has the following settings: 00 Round to nearest 01 Round to 0 10 Round to +inf 11 Round to -inf FLT_ROUNDS, on the other hand, expects the following: -1 Undefined 0 Round to 0 1 Round to nearest 2 Round to +inf 3 Round to -inf */ // Save FPC to register. SDValue Chain = Op.getOperand(0); SDValue EFPC( DAG.getMachineNode(SystemZ::EFPC, dl, {MVT::i32, MVT::Other}, Chain), 0); Chain = EFPC.getValue(1); // Transform as necessary SDValue CWD1 = DAG.getNode(ISD::AND, dl, MVT::i32, EFPC, DAG.getConstant(3, dl, MVT::i32)); // RetVal = (CWD1 ^ (CWD1 >> 1)) ^ 1 SDValue CWD2 = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, DAG.getNode(ISD::SRL, dl, MVT::i32, CWD1, DAG.getConstant(1, dl, MVT::i32))); SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD2, DAG.getConstant(1, dl, MVT::i32)); RetVal = DAG.getZExtOrTrunc(RetVal, dl, Op.getValueType()); return DAG.getMergeValues({RetVal, Chain}, dl); } SDValue SystemZTargetLowering::lowerVECREDUCE_ADD(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); Op = Op.getOperand(0); EVT OpVT = Op.getValueType(); assert(OpVT.isVector() && "Operand type for VECREDUCE_ADD is not a vector."); SDLoc DL(Op); // load a 0 vector for the third operand of VSUM. SDValue Zero = DAG.getSplatBuildVector(OpVT, DL, DAG.getConstant(0, DL, VT)); // execute VSUM. switch (OpVT.getScalarSizeInBits()) { case 8: case 16: Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Zero); [[fallthrough]]; case 32: case 64: Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::i128, Op, DAG.getBitcast(Op.getValueType(), Zero)); break; case 128: break; // VSUM over v1i128 should not happen and would be a noop default: llvm_unreachable("Unexpected scalar size."); } // Cast to original vector type, retrieve last element. return DAG.getNode( ISD::EXTRACT_VECTOR_ELT, DL, VT, DAG.getBitcast(OpVT, Op), DAG.getConstant(OpVT.getVectorNumElements() - 1, DL, MVT::i32)); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp index 70b91c266c49..cd0aea313da0 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp @@ -1,1776 +1,1739 @@ //===-- WebAssemblyCFGStackify.cpp - CFG Stackification -------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// /// \file /// This file implements a CFG stacking pass. /// /// This pass inserts BLOCK, LOOP, and TRY markers to mark the start of scopes, /// since scope boundaries serve as the labels for WebAssembly's control /// transfers. /// /// This is sufficient to convert arbitrary CFGs into a form that works on /// WebAssembly, provided that all loops are single-entry. /// /// In case we use exceptions, this pass also fixes mismatches in unwind /// destinations created during transforming CFG into wasm structured format. /// //===----------------------------------------------------------------------===// #include "Utils/WebAssemblyTypeUtilities.h" #include "WebAssembly.h" #include "WebAssemblyExceptionInfo.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySortRegion.h" #include "WebAssemblySubtarget.h" #include "WebAssemblyUtilities.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/WasmEHFuncInfo.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; using WebAssembly::SortRegionInfo; #define DEBUG_TYPE "wasm-cfg-stackify" STATISTIC(NumCallUnwindMismatches, "Number of call unwind mismatches found"); STATISTIC(NumCatchUnwindMismatches, "Number of catch unwind mismatches found"); namespace { class WebAssemblyCFGStackify final : public MachineFunctionPass { StringRef getPassName() const override { return "WebAssembly CFG Stackify"; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } bool runOnMachineFunction(MachineFunction &MF) override; // For each block whose label represents the end of a scope, record the block // which holds the beginning of the scope. This will allow us to quickly skip // over scoped regions when walking blocks. SmallVector ScopeTops; void updateScopeTops(MachineBasicBlock *Begin, MachineBasicBlock *End) { int EndNo = End->getNumber(); if (!ScopeTops[EndNo] || ScopeTops[EndNo]->getNumber() > Begin->getNumber()) ScopeTops[EndNo] = Begin; } // Placing markers. void placeMarkers(MachineFunction &MF); void placeBlockMarker(MachineBasicBlock &MBB); void placeLoopMarker(MachineBasicBlock &MBB); void placeTryMarker(MachineBasicBlock &MBB); // Exception handling related functions bool fixCallUnwindMismatches(MachineFunction &MF); bool fixCatchUnwindMismatches(MachineFunction &MF); void addTryDelegate(MachineInstr *RangeBegin, MachineInstr *RangeEnd, MachineBasicBlock *DelegateDest); void recalculateScopeTops(MachineFunction &MF); void removeUnnecessaryInstrs(MachineFunction &MF); // Wrap-up using EndMarkerInfo = std::pair; unsigned getBranchDepth(const SmallVectorImpl &Stack, const MachineBasicBlock *MBB); unsigned getDelegateDepth(const SmallVectorImpl &Stack, const MachineBasicBlock *MBB); - unsigned - getRethrowDepth(const SmallVectorImpl &Stack, - const SmallVectorImpl &EHPadStack); + unsigned getRethrowDepth(const SmallVectorImpl &Stack, + const MachineBasicBlock *EHPadToRethrow); void rewriteDepthImmediates(MachineFunction &MF); void fixEndsAtEndOfFunction(MachineFunction &MF); void cleanupFunctionData(MachineFunction &MF); // For each BLOCK|LOOP|TRY, the corresponding END_(BLOCK|LOOP|TRY) or DELEGATE // (in case of TRY). DenseMap BeginToEnd; // For each END_(BLOCK|LOOP|TRY) or DELEGATE, the corresponding // BLOCK|LOOP|TRY. DenseMap EndToBegin; // map DenseMap TryToEHPad; // map DenseMap EHPadToTry; // We need an appendix block to place 'end_loop' or 'end_try' marker when the // loop / exception bottom block is the last block in a function MachineBasicBlock *AppendixBB = nullptr; MachineBasicBlock *getAppendixBlock(MachineFunction &MF) { if (!AppendixBB) { AppendixBB = MF.CreateMachineBasicBlock(); // Give it a fake predecessor so that AsmPrinter prints its label. AppendixBB->addSuccessor(AppendixBB); MF.push_back(AppendixBB); } return AppendixBB; } // Before running rewriteDepthImmediates function, 'delegate' has a BB as its // destination operand. getFakeCallerBlock() returns a fake BB that will be // used for the operand when 'delegate' needs to rethrow to the caller. This // will be rewritten as an immediate value that is the number of block depths // + 1 in rewriteDepthImmediates, and this fake BB will be removed at the end // of the pass. MachineBasicBlock *FakeCallerBB = nullptr; MachineBasicBlock *getFakeCallerBlock(MachineFunction &MF) { if (!FakeCallerBB) FakeCallerBB = MF.CreateMachineBasicBlock(); return FakeCallerBB; } // Helper functions to register / unregister scope information created by // marker instructions. void registerScope(MachineInstr *Begin, MachineInstr *End); void registerTryScope(MachineInstr *Begin, MachineInstr *End, MachineBasicBlock *EHPad); void unregisterScope(MachineInstr *Begin); public: static char ID; // Pass identification, replacement for typeid WebAssemblyCFGStackify() : MachineFunctionPass(ID) {} ~WebAssemblyCFGStackify() override { releaseMemory(); } void releaseMemory() override; }; } // end anonymous namespace char WebAssemblyCFGStackify::ID = 0; INITIALIZE_PASS(WebAssemblyCFGStackify, DEBUG_TYPE, "Insert BLOCK/LOOP/TRY markers for WebAssembly scopes", false, false) FunctionPass *llvm::createWebAssemblyCFGStackify() { return new WebAssemblyCFGStackify(); } /// Test whether Pred has any terminators explicitly branching to MBB, as /// opposed to falling through. Note that it's possible (eg. in unoptimized /// code) for a branch instruction to both branch to a block and fallthrough /// to it, so we check the actual branch operands to see if there are any /// explicit mentions. static bool explicitlyBranchesTo(MachineBasicBlock *Pred, MachineBasicBlock *MBB) { for (MachineInstr &MI : Pred->terminators()) for (MachineOperand &MO : MI.explicit_operands()) if (MO.isMBB() && MO.getMBB() == MBB) return true; return false; } // Returns an iterator to the earliest position possible within the MBB, // satisfying the restrictions given by BeforeSet and AfterSet. BeforeSet // contains instructions that should go before the marker, and AfterSet contains // ones that should go after the marker. In this function, AfterSet is only // used for validation checking. template static MachineBasicBlock::iterator getEarliestInsertPos(MachineBasicBlock *MBB, const Container &BeforeSet, const Container &AfterSet) { auto InsertPos = MBB->end(); while (InsertPos != MBB->begin()) { if (BeforeSet.count(&*std::prev(InsertPos))) { #ifndef NDEBUG // Validation check for (auto Pos = InsertPos, E = MBB->begin(); Pos != E; --Pos) assert(!AfterSet.count(&*std::prev(Pos))); #endif break; } --InsertPos; } return InsertPos; } // Returns an iterator to the latest position possible within the MBB, // satisfying the restrictions given by BeforeSet and AfterSet. BeforeSet // contains instructions that should go before the marker, and AfterSet contains // ones that should go after the marker. In this function, BeforeSet is only // used for validation checking. template static MachineBasicBlock::iterator getLatestInsertPos(MachineBasicBlock *MBB, const Container &BeforeSet, const Container &AfterSet) { auto InsertPos = MBB->begin(); while (InsertPos != MBB->end()) { if (AfterSet.count(&*InsertPos)) { #ifndef NDEBUG // Validation check for (auto Pos = InsertPos, E = MBB->end(); Pos != E; ++Pos) assert(!BeforeSet.count(&*Pos)); #endif break; } ++InsertPos; } return InsertPos; } void WebAssemblyCFGStackify::registerScope(MachineInstr *Begin, MachineInstr *End) { BeginToEnd[Begin] = End; EndToBegin[End] = Begin; } // When 'End' is not an 'end_try' but 'delegate, EHPad is nullptr. void WebAssemblyCFGStackify::registerTryScope(MachineInstr *Begin, MachineInstr *End, MachineBasicBlock *EHPad) { registerScope(Begin, End); TryToEHPad[Begin] = EHPad; EHPadToTry[EHPad] = Begin; } void WebAssemblyCFGStackify::unregisterScope(MachineInstr *Begin) { assert(BeginToEnd.count(Begin)); MachineInstr *End = BeginToEnd[Begin]; assert(EndToBegin.count(End)); BeginToEnd.erase(Begin); EndToBegin.erase(End); MachineBasicBlock *EHPad = TryToEHPad.lookup(Begin); if (EHPad) { assert(EHPadToTry.count(EHPad)); TryToEHPad.erase(Begin); EHPadToTry.erase(EHPad); } } /// Insert a BLOCK marker for branches to MBB (if needed). // TODO Consider a more generalized way of handling block (and also loop and // try) signatures when we implement the multi-value proposal later. void WebAssemblyCFGStackify::placeBlockMarker(MachineBasicBlock &MBB) { assert(!MBB.isEHPad()); MachineFunction &MF = *MBB.getParent(); auto &MDT = getAnalysis().getDomTree(); const auto &TII = *MF.getSubtarget().getInstrInfo(); const auto &MFI = *MF.getInfo(); // First compute the nearest common dominator of all forward non-fallthrough // predecessors so that we minimize the time that the BLOCK is on the stack, // which reduces overall stack height. MachineBasicBlock *Header = nullptr; bool IsBranchedTo = false; int MBBNumber = MBB.getNumber(); for (MachineBasicBlock *Pred : MBB.predecessors()) { if (Pred->getNumber() < MBBNumber) { Header = Header ? MDT.findNearestCommonDominator(Header, Pred) : Pred; if (explicitlyBranchesTo(Pred, &MBB)) IsBranchedTo = true; } } if (!Header) return; if (!IsBranchedTo) return; assert(&MBB != &MF.front() && "Header blocks shouldn't have predecessors"); MachineBasicBlock *LayoutPred = MBB.getPrevNode(); // If the nearest common dominator is inside a more deeply nested context, // walk out to the nearest scope which isn't more deeply nested. for (MachineFunction::iterator I(LayoutPred), E(Header); I != E; --I) { if (MachineBasicBlock *ScopeTop = ScopeTops[I->getNumber()]) { if (ScopeTop->getNumber() > Header->getNumber()) { // Skip over an intervening scope. I = std::next(ScopeTop->getIterator()); } else { // We found a scope level at an appropriate depth. Header = ScopeTop; break; } } } // Decide where in Header to put the BLOCK. // Instructions that should go before the BLOCK. SmallPtrSet BeforeSet; // Instructions that should go after the BLOCK. SmallPtrSet AfterSet; for (const auto &MI : *Header) { // If there is a previously placed LOOP marker and the bottom block of the // loop is above MBB, it should be after the BLOCK, because the loop is // nested in this BLOCK. Otherwise it should be before the BLOCK. if (MI.getOpcode() == WebAssembly::LOOP) { auto *LoopBottom = BeginToEnd[&MI]->getParent()->getPrevNode(); if (MBB.getNumber() > LoopBottom->getNumber()) AfterSet.insert(&MI); #ifndef NDEBUG else BeforeSet.insert(&MI); #endif } // If there is a previously placed BLOCK/TRY marker and its corresponding // END marker is before the current BLOCK's END marker, that should be // placed after this BLOCK. Otherwise it should be placed before this BLOCK // marker. if (MI.getOpcode() == WebAssembly::BLOCK || MI.getOpcode() == WebAssembly::TRY) { if (BeginToEnd[&MI]->getParent()->getNumber() <= MBB.getNumber()) AfterSet.insert(&MI); #ifndef NDEBUG else BeforeSet.insert(&MI); #endif } #ifndef NDEBUG // All END_(BLOCK|LOOP|TRY) markers should be before the BLOCK. if (MI.getOpcode() == WebAssembly::END_BLOCK || MI.getOpcode() == WebAssembly::END_LOOP || MI.getOpcode() == WebAssembly::END_TRY) BeforeSet.insert(&MI); #endif // Terminators should go after the BLOCK. if (MI.isTerminator()) AfterSet.insert(&MI); } // Local expression tree should go after the BLOCK. for (auto I = Header->getFirstTerminator(), E = Header->begin(); I != E; --I) { if (std::prev(I)->isDebugInstr() || std::prev(I)->isPosition()) continue; if (WebAssembly::isChild(*std::prev(I), MFI)) AfterSet.insert(&*std::prev(I)); else break; } // Add the BLOCK. WebAssembly::BlockType ReturnType = WebAssembly::BlockType::Void; auto InsertPos = getLatestInsertPos(Header, BeforeSet, AfterSet); MachineInstr *Begin = BuildMI(*Header, InsertPos, Header->findDebugLoc(InsertPos), TII.get(WebAssembly::BLOCK)) .addImm(int64_t(ReturnType)); // Decide where in Header to put the END_BLOCK. BeforeSet.clear(); AfterSet.clear(); for (auto &MI : MBB) { #ifndef NDEBUG // END_BLOCK should precede existing LOOP and TRY markers. if (MI.getOpcode() == WebAssembly::LOOP || MI.getOpcode() == WebAssembly::TRY) AfterSet.insert(&MI); #endif // If there is a previously placed END_LOOP marker and the header of the // loop is above this block's header, the END_LOOP should be placed after // the BLOCK, because the loop contains this block. Otherwise the END_LOOP // should be placed before the BLOCK. The same for END_TRY. if (MI.getOpcode() == WebAssembly::END_LOOP || MI.getOpcode() == WebAssembly::END_TRY) { if (EndToBegin[&MI]->getParent()->getNumber() >= Header->getNumber()) BeforeSet.insert(&MI); #ifndef NDEBUG else AfterSet.insert(&MI); #endif } } // Mark the end of the block. InsertPos = getEarliestInsertPos(&MBB, BeforeSet, AfterSet); MachineInstr *End = BuildMI(MBB, InsertPos, MBB.findPrevDebugLoc(InsertPos), TII.get(WebAssembly::END_BLOCK)); registerScope(Begin, End); // Track the farthest-spanning scope that ends at this point. updateScopeTops(Header, &MBB); } /// Insert a LOOP marker for a loop starting at MBB (if it's a loop header). void WebAssemblyCFGStackify::placeLoopMarker(MachineBasicBlock &MBB) { MachineFunction &MF = *MBB.getParent(); const auto &MLI = getAnalysis().getLI(); const auto &WEI = getAnalysis(); SortRegionInfo SRI(MLI, WEI); const auto &TII = *MF.getSubtarget().getInstrInfo(); MachineLoop *Loop = MLI.getLoopFor(&MBB); if (!Loop || Loop->getHeader() != &MBB) return; // The operand of a LOOP is the first block after the loop. If the loop is the // bottom of the function, insert a dummy block at the end. MachineBasicBlock *Bottom = SRI.getBottom(Loop); auto Iter = std::next(Bottom->getIterator()); if (Iter == MF.end()) { getAppendixBlock(MF); Iter = std::next(Bottom->getIterator()); } MachineBasicBlock *AfterLoop = &*Iter; // Decide where in Header to put the LOOP. SmallPtrSet BeforeSet; SmallPtrSet AfterSet; for (const auto &MI : MBB) { // LOOP marker should be after any existing loop that ends here. Otherwise // we assume the instruction belongs to the loop. if (MI.getOpcode() == WebAssembly::END_LOOP) BeforeSet.insert(&MI); #ifndef NDEBUG else AfterSet.insert(&MI); #endif } // Mark the beginning of the loop. auto InsertPos = getEarliestInsertPos(&MBB, BeforeSet, AfterSet); MachineInstr *Begin = BuildMI(MBB, InsertPos, MBB.findDebugLoc(InsertPos), TII.get(WebAssembly::LOOP)) .addImm(int64_t(WebAssembly::BlockType::Void)); // Decide where in Header to put the END_LOOP. BeforeSet.clear(); AfterSet.clear(); #ifndef NDEBUG for (const auto &MI : MBB) // Existing END_LOOP markers belong to parent loops of this loop if (MI.getOpcode() == WebAssembly::END_LOOP) AfterSet.insert(&MI); #endif // Mark the end of the loop (using arbitrary debug location that branched to // the loop end as its location). InsertPos = getEarliestInsertPos(AfterLoop, BeforeSet, AfterSet); DebugLoc EndDL = AfterLoop->pred_empty() ? DebugLoc() : (*AfterLoop->pred_rbegin())->findBranchDebugLoc(); MachineInstr *End = BuildMI(*AfterLoop, InsertPos, EndDL, TII.get(WebAssembly::END_LOOP)); registerScope(Begin, End); assert((!ScopeTops[AfterLoop->getNumber()] || ScopeTops[AfterLoop->getNumber()]->getNumber() < MBB.getNumber()) && "With block sorting the outermost loop for a block should be first."); updateScopeTops(&MBB, AfterLoop); } void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) { assert(MBB.isEHPad()); MachineFunction &MF = *MBB.getParent(); auto &MDT = getAnalysis().getDomTree(); const auto &TII = *MF.getSubtarget().getInstrInfo(); const auto &MLI = getAnalysis().getLI(); const auto &WEI = getAnalysis(); SortRegionInfo SRI(MLI, WEI); const auto &MFI = *MF.getInfo(); // Compute the nearest common dominator of all unwind predecessors MachineBasicBlock *Header = nullptr; int MBBNumber = MBB.getNumber(); for (auto *Pred : MBB.predecessors()) { if (Pred->getNumber() < MBBNumber) { Header = Header ? MDT.findNearestCommonDominator(Header, Pred) : Pred; assert(!explicitlyBranchesTo(Pred, &MBB) && "Explicit branch to an EH pad!"); } } if (!Header) return; // If this try is at the bottom of the function, insert a dummy block at the // end. WebAssemblyException *WE = WEI.getExceptionFor(&MBB); assert(WE); MachineBasicBlock *Bottom = SRI.getBottom(WE); auto Iter = std::next(Bottom->getIterator()); if (Iter == MF.end()) { getAppendixBlock(MF); Iter = std::next(Bottom->getIterator()); } MachineBasicBlock *Cont = &*Iter; assert(Cont != &MF.front()); MachineBasicBlock *LayoutPred = Cont->getPrevNode(); // If the nearest common dominator is inside a more deeply nested context, // walk out to the nearest scope which isn't more deeply nested. for (MachineFunction::iterator I(LayoutPred), E(Header); I != E; --I) { if (MachineBasicBlock *ScopeTop = ScopeTops[I->getNumber()]) { if (ScopeTop->getNumber() > Header->getNumber()) { // Skip over an intervening scope. I = std::next(ScopeTop->getIterator()); } else { // We found a scope level at an appropriate depth. Header = ScopeTop; break; } } } // Decide where in Header to put the TRY. // Instructions that should go before the TRY. SmallPtrSet BeforeSet; // Instructions that should go after the TRY. SmallPtrSet AfterSet; for (const auto &MI : *Header) { // If there is a previously placed LOOP marker and the bottom block of the // loop is above MBB, it should be after the TRY, because the loop is nested // in this TRY. Otherwise it should be before the TRY. if (MI.getOpcode() == WebAssembly::LOOP) { auto *LoopBottom = BeginToEnd[&MI]->getParent()->getPrevNode(); if (MBB.getNumber() > LoopBottom->getNumber()) AfterSet.insert(&MI); #ifndef NDEBUG else BeforeSet.insert(&MI); #endif } // All previously inserted BLOCK/TRY markers should be after the TRY because // they are all nested trys. if (MI.getOpcode() == WebAssembly::BLOCK || MI.getOpcode() == WebAssembly::TRY) AfterSet.insert(&MI); #ifndef NDEBUG // All END_(BLOCK/LOOP/TRY) markers should be before the TRY. if (MI.getOpcode() == WebAssembly::END_BLOCK || MI.getOpcode() == WebAssembly::END_LOOP || MI.getOpcode() == WebAssembly::END_TRY) BeforeSet.insert(&MI); #endif // Terminators should go after the TRY. if (MI.isTerminator()) AfterSet.insert(&MI); } // If Header unwinds to MBB (= Header contains 'invoke'), the try block should // contain the call within it. So the call should go after the TRY. The // exception is when the header's terminator is a rethrow instruction, in // which case that instruction, not a call instruction before it, is gonna // throw. MachineInstr *ThrowingCall = nullptr; if (MBB.isPredecessor(Header)) { auto TermPos = Header->getFirstTerminator(); if (TermPos == Header->end() || TermPos->getOpcode() != WebAssembly::RETHROW) { for (auto &MI : reverse(*Header)) { if (MI.isCall()) { AfterSet.insert(&MI); ThrowingCall = &MI; // Possibly throwing calls are usually wrapped by EH_LABEL // instructions. We don't want to split them and the call. if (MI.getIterator() != Header->begin() && std::prev(MI.getIterator())->isEHLabel()) { AfterSet.insert(&*std::prev(MI.getIterator())); ThrowingCall = &*std::prev(MI.getIterator()); } break; } } } } // Local expression tree should go after the TRY. // For BLOCK placement, we start the search from the previous instruction of a // BB's terminator, but in TRY's case, we should start from the previous // instruction of a call that can throw, or a EH_LABEL that precedes the call, // because the return values of the call's previous instructions can be // stackified and consumed by the throwing call. auto SearchStartPt = ThrowingCall ? MachineBasicBlock::iterator(ThrowingCall) : Header->getFirstTerminator(); for (auto I = SearchStartPt, E = Header->begin(); I != E; --I) { if (std::prev(I)->isDebugInstr() || std::prev(I)->isPosition()) continue; if (WebAssembly::isChild(*std::prev(I), MFI)) AfterSet.insert(&*std::prev(I)); else break; } // Add the TRY. auto InsertPos = getLatestInsertPos(Header, BeforeSet, AfterSet); MachineInstr *Begin = BuildMI(*Header, InsertPos, Header->findDebugLoc(InsertPos), TII.get(WebAssembly::TRY)) .addImm(int64_t(WebAssembly::BlockType::Void)); // Decide where in Header to put the END_TRY. BeforeSet.clear(); AfterSet.clear(); for (const auto &MI : *Cont) { #ifndef NDEBUG // END_TRY should precede existing LOOP and BLOCK markers. if (MI.getOpcode() == WebAssembly::LOOP || MI.getOpcode() == WebAssembly::BLOCK) AfterSet.insert(&MI); // All END_TRY markers placed earlier belong to exceptions that contains // this one. if (MI.getOpcode() == WebAssembly::END_TRY) AfterSet.insert(&MI); #endif // If there is a previously placed END_LOOP marker and its header is after // where TRY marker is, this loop is contained within the 'catch' part, so // the END_TRY marker should go after that. Otherwise, the whole try-catch // is contained within this loop, so the END_TRY should go before that. if (MI.getOpcode() == WebAssembly::END_LOOP) { // For a LOOP to be after TRY, LOOP's BB should be after TRY's BB; if they // are in the same BB, LOOP is always before TRY. if (EndToBegin[&MI]->getParent()->getNumber() > Header->getNumber()) BeforeSet.insert(&MI); #ifndef NDEBUG else AfterSet.insert(&MI); #endif } // It is not possible for an END_BLOCK to be already in this block. } // Mark the end of the TRY. InsertPos = getEarliestInsertPos(Cont, BeforeSet, AfterSet); MachineInstr *End = BuildMI(*Cont, InsertPos, Bottom->findBranchDebugLoc(), TII.get(WebAssembly::END_TRY)); registerTryScope(Begin, End, &MBB); // Track the farthest-spanning scope that ends at this point. We create two // mappings: (BB with 'end_try' -> BB with 'try') and (BB with 'catch' -> BB // with 'try'). We need to create 'catch' -> 'try' mapping here too because // markers should not span across 'catch'. For example, this should not // happen: // // try // block --| (X) // catch | // end_block --| // end_try for (auto *End : {&MBB, Cont}) updateScopeTops(Header, End); } void WebAssemblyCFGStackify::removeUnnecessaryInstrs(MachineFunction &MF) { const auto &TII = *MF.getSubtarget().getInstrInfo(); // When there is an unconditional branch right before a catch instruction and // it branches to the end of end_try marker, we don't need the branch, because // if there is no exception, the control flow transfers to that point anyway. // bb0: // try // ... // br bb2 <- Not necessary // bb1 (ehpad): // catch // ... // bb2: <- Continuation BB // end // // A more involved case: When the BB where 'end' is located is an another EH // pad, the Cont (= continuation) BB is that EH pad's 'end' BB. For example, // bb0: // try // try // ... // br bb3 <- Not necessary // bb1 (ehpad): // catch // bb2 (ehpad): // end // catch // ... // bb3: <- Continuation BB // end // // When the EH pad at hand is bb1, its matching end_try is in bb2. But it is // another EH pad, so bb0's continuation BB becomes bb3. So 'br bb3' in the // code can be deleted. This is why we run 'while' until 'Cont' is not an EH // pad. for (auto &MBB : MF) { if (!MBB.isEHPad()) continue; MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector Cond; MachineBasicBlock *EHPadLayoutPred = MBB.getPrevNode(); MachineBasicBlock *Cont = &MBB; while (Cont->isEHPad()) { MachineInstr *Try = EHPadToTry[Cont]; MachineInstr *EndTry = BeginToEnd[Try]; // We started from an EH pad, so the end marker cannot be a delegate assert(EndTry->getOpcode() != WebAssembly::DELEGATE); Cont = EndTry->getParent(); } bool Analyzable = !TII.analyzeBranch(*EHPadLayoutPred, TBB, FBB, Cond); // This condition means either // 1. This BB ends with a single unconditional branch whose destinaion is // Cont. // 2. This BB ends with a conditional branch followed by an unconditional // branch, and the unconditional branch's destination is Cont. // In both cases, we want to remove the last (= unconditional) branch. if (Analyzable && ((Cond.empty() && TBB && TBB == Cont) || (!Cond.empty() && FBB && FBB == Cont))) { bool ErasedUncondBr = false; (void)ErasedUncondBr; for (auto I = EHPadLayoutPred->end(), E = EHPadLayoutPred->begin(); I != E; --I) { auto PrevI = std::prev(I); if (PrevI->isTerminator()) { assert(PrevI->getOpcode() == WebAssembly::BR); PrevI->eraseFromParent(); ErasedUncondBr = true; break; } } assert(ErasedUncondBr && "Unconditional branch not erased!"); } } // When there are block / end_block markers that overlap with try / end_try // markers, and the block and try markers' return types are the same, the // block /end_block markers are not necessary, because try / end_try markers // also can serve as boundaries for branches. // block <- Not necessary // try // ... // catch // ... // end // end <- Not necessary SmallVector ToDelete; for (auto &MBB : MF) { for (auto &MI : MBB) { if (MI.getOpcode() != WebAssembly::TRY) continue; MachineInstr *Try = &MI, *EndTry = BeginToEnd[Try]; if (EndTry->getOpcode() == WebAssembly::DELEGATE) continue; MachineBasicBlock *TryBB = Try->getParent(); MachineBasicBlock *Cont = EndTry->getParent(); int64_t RetType = Try->getOperand(0).getImm(); for (auto B = Try->getIterator(), E = std::next(EndTry->getIterator()); B != TryBB->begin() && E != Cont->end() && std::prev(B)->getOpcode() == WebAssembly::BLOCK && E->getOpcode() == WebAssembly::END_BLOCK && std::prev(B)->getOperand(0).getImm() == RetType; --B, ++E) { ToDelete.push_back(&*std::prev(B)); ToDelete.push_back(&*E); } } } for (auto *MI : ToDelete) { if (MI->getOpcode() == WebAssembly::BLOCK) unregisterScope(MI); MI->eraseFromParent(); } } // When MBB is split into MBB and Split, we should unstackify defs in MBB that // have their uses in Split. static void unstackifyVRegsUsedInSplitBB(MachineBasicBlock &MBB, MachineBasicBlock &Split) { MachineFunction &MF = *MBB.getParent(); const auto &TII = *MF.getSubtarget().getInstrInfo(); auto &MFI = *MF.getInfo(); auto &MRI = MF.getRegInfo(); for (auto &MI : Split) { for (auto &MO : MI.explicit_uses()) { if (!MO.isReg() || MO.getReg().isPhysical()) continue; if (MachineInstr *Def = MRI.getUniqueVRegDef(MO.getReg())) if (Def->getParent() == &MBB) MFI.unstackifyVReg(MO.getReg()); } } // In RegStackify, when a register definition is used multiple times, // Reg = INST ... // INST ..., Reg, ... // INST ..., Reg, ... // INST ..., Reg, ... // // we introduce a TEE, which has the following form: // DefReg = INST ... // TeeReg, Reg = TEE_... DefReg // INST ..., TeeReg, ... // INST ..., Reg, ... // INST ..., Reg, ... // with DefReg and TeeReg stackified but Reg not stackified. // // But the invariant that TeeReg should be stackified can be violated while we // unstackify registers in the split BB above. In this case, we convert TEEs // into two COPYs. This COPY will be eventually eliminated in ExplicitLocals. // DefReg = INST ... // TeeReg = COPY DefReg // Reg = COPY DefReg // INST ..., TeeReg, ... // INST ..., Reg, ... // INST ..., Reg, ... for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { if (!WebAssembly::isTee(MI.getOpcode())) continue; Register TeeReg = MI.getOperand(0).getReg(); Register Reg = MI.getOperand(1).getReg(); Register DefReg = MI.getOperand(2).getReg(); if (!MFI.isVRegStackified(TeeReg)) { // Now we are not using TEE anymore, so unstackify DefReg too MFI.unstackifyVReg(DefReg); unsigned CopyOpc = WebAssembly::getCopyOpcodeForRegClass(MRI.getRegClass(DefReg)); BuildMI(MBB, &MI, MI.getDebugLoc(), TII.get(CopyOpc), TeeReg) .addReg(DefReg); BuildMI(MBB, &MI, MI.getDebugLoc(), TII.get(CopyOpc), Reg).addReg(DefReg); MI.eraseFromParent(); } } } // Wrap the given range of instruction with try-delegate. RangeBegin and // RangeEnd are inclusive. void WebAssemblyCFGStackify::addTryDelegate(MachineInstr *RangeBegin, MachineInstr *RangeEnd, MachineBasicBlock *DelegateDest) { auto *BeginBB = RangeBegin->getParent(); auto *EndBB = RangeEnd->getParent(); MachineFunction &MF = *BeginBB->getParent(); const auto &MFI = *MF.getInfo(); const auto &TII = *MF.getSubtarget().getInstrInfo(); // Local expression tree before the first call of this range should go // after the nested TRY. SmallPtrSet AfterSet; AfterSet.insert(RangeBegin); for (auto I = MachineBasicBlock::iterator(RangeBegin), E = BeginBB->begin(); I != E; --I) { if (std::prev(I)->isDebugInstr() || std::prev(I)->isPosition()) continue; if (WebAssembly::isChild(*std::prev(I), MFI)) AfterSet.insert(&*std::prev(I)); else break; } // Create the nested try instruction. auto TryPos = getLatestInsertPos( BeginBB, SmallPtrSet(), AfterSet); MachineInstr *Try = BuildMI(*BeginBB, TryPos, RangeBegin->getDebugLoc(), TII.get(WebAssembly::TRY)) .addImm(int64_t(WebAssembly::BlockType::Void)); // Create a BB to insert the 'delegate' instruction. MachineBasicBlock *DelegateBB = MF.CreateMachineBasicBlock(); // If the destination of 'delegate' is not the caller, adds the destination to // the BB's successors. if (DelegateDest != FakeCallerBB) DelegateBB->addSuccessor(DelegateDest); auto SplitPos = std::next(RangeEnd->getIterator()); if (SplitPos == EndBB->end()) { // If the range's end instruction is at the end of the BB, insert the new // delegate BB after the current BB. MF.insert(std::next(EndBB->getIterator()), DelegateBB); EndBB->addSuccessor(DelegateBB); } else { // When the split pos is in the middle of a BB, we split the BB into two and // put the 'delegate' BB in between. We normally create a split BB and make // it a successor of the original BB (PostSplit == true), but in case the BB // is an EH pad and the split pos is before 'catch', we should preserve the // BB's property, including that it is an EH pad, in the later part of the // BB, where 'catch' is. In this case we set PostSplit to false. bool PostSplit = true; if (EndBB->isEHPad()) { for (auto I = MachineBasicBlock::iterator(SplitPos), E = EndBB->end(); I != E; ++I) { if (WebAssembly::isCatch(I->getOpcode())) { PostSplit = false; break; } } } MachineBasicBlock *PreBB = nullptr, *PostBB = nullptr; if (PostSplit) { // If the range's end instruction is in the middle of the BB, we split the // BB into two and insert the delegate BB in between. // - Before: // bb: // range_end // other_insts // // - After: // pre_bb: (previous 'bb') // range_end // delegate_bb: (new) // delegate // post_bb: (new) // other_insts PreBB = EndBB; PostBB = MF.CreateMachineBasicBlock(); MF.insert(std::next(PreBB->getIterator()), PostBB); MF.insert(std::next(PreBB->getIterator()), DelegateBB); PostBB->splice(PostBB->end(), PreBB, SplitPos, PreBB->end()); PostBB->transferSuccessors(PreBB); } else { // - Before: // ehpad: // range_end // catch // ... // // - After: // pre_bb: (new) // range_end // delegate_bb: (new) // delegate // post_bb: (previous 'ehpad') // catch // ... assert(EndBB->isEHPad()); PreBB = MF.CreateMachineBasicBlock(); PostBB = EndBB; MF.insert(PostBB->getIterator(), PreBB); MF.insert(PostBB->getIterator(), DelegateBB); PreBB->splice(PreBB->end(), PostBB, PostBB->begin(), SplitPos); // We don't need to transfer predecessors of the EH pad to 'PreBB', // because an EH pad's predecessors are all through unwind edges and they // should still unwind to the EH pad, not PreBB. } unstackifyVRegsUsedInSplitBB(*PreBB, *PostBB); PreBB->addSuccessor(DelegateBB); PreBB->addSuccessor(PostBB); } // Add 'delegate' instruction in the delegate BB created above. MachineInstr *Delegate = BuildMI(DelegateBB, RangeEnd->getDebugLoc(), TII.get(WebAssembly::DELEGATE)) .addMBB(DelegateDest); registerTryScope(Try, Delegate, nullptr); } bool WebAssemblyCFGStackify::fixCallUnwindMismatches(MachineFunction &MF) { // Linearizing the control flow by placing TRY / END_TRY markers can create // mismatches in unwind destinations for throwing instructions, such as calls. // // We use the 'delegate' instruction to fix the unwind mismatches. 'delegate' // instruction delegates an exception to an outer 'catch'. It can target not // only 'catch' but all block-like structures including another 'delegate', // but with slightly different semantics than branches. When it targets a // 'catch', it will delegate the exception to that catch. It is being // discussed how to define the semantics when 'delegate''s target is a non-try // block: it will either be a validation failure or it will target the next // outer try-catch. But anyway our LLVM backend currently does not generate // such code. The example below illustrates where the 'delegate' instruction // in the middle will delegate the exception to, depending on the value of N. // try // try // block // try // try // call @foo // delegate N ;; Where will this delegate to? // catch ;; N == 0 // end // end ;; N == 1 (invalid; will not be generated) // delegate ;; N == 2 // catch ;; N == 3 // end // ;; N == 4 (to caller) // 1. When an instruction may throw, but the EH pad it will unwind to can be // different from the original CFG. // // Example: we have the following CFG: // bb0: // call @foo ; if it throws, unwind to bb2 // bb1: // call @bar ; if it throws, unwind to bb3 // bb2 (ehpad): // catch // ... // bb3 (ehpad) // catch // ... // // And the CFG is sorted in this order. Then after placing TRY markers, it // will look like: (BB markers are omitted) // try // try // call @foo // call @bar ;; if it throws, unwind to bb3 // catch ;; ehpad (bb2) // ... // end_try // catch ;; ehpad (bb3) // ... // end_try // // Now if bar() throws, it is going to end up ip in bb2, not bb3, where it // is supposed to end up. We solve this problem by wrapping the mismatching // call with an inner try-delegate that rethrows the exception to the right // 'catch'. // // try // try // call @foo // try ;; (new) // call @bar // delegate 1 (bb3) ;; (new) // catch ;; ehpad (bb2) // ... // end_try // catch ;; ehpad (bb3) // ... // end_try // // --- // 2. The same as 1, but in this case an instruction unwinds to a caller // function and not another EH pad. // // Example: we have the following CFG: // bb0: // call @foo ; if it throws, unwind to bb2 // bb1: // call @bar ; if it throws, unwind to caller // bb2 (ehpad): // catch // ... // // And the CFG is sorted in this order. Then after placing TRY markers, it // will look like: // try // call @foo // call @bar ;; if it throws, unwind to caller // catch ;; ehpad (bb2) // ... // end_try // // Now if bar() throws, it is going to end up ip in bb2, when it is supposed // throw up to the caller. We solve this problem in the same way, but in this // case 'delegate's immediate argument is the number of block depths + 1, // which means it rethrows to the caller. // try // call @foo // try ;; (new) // call @bar // delegate 1 (caller) ;; (new) // catch ;; ehpad (bb2) // ... // end_try // // Before rewriteDepthImmediates, delegate's argument is a BB. In case of the // caller, it will take a fake BB generated by getFakeCallerBlock(), which // will be converted to a correct immediate argument later. // // In case there are multiple calls in a BB that may throw to the caller, they // can be wrapped together in one nested try-delegate scope. (In 1, this // couldn't happen, because may-throwing instruction there had an unwind // destination, i.e., it was an invoke before, and there could be only one // invoke within a BB.) SmallVector EHPadStack; // Range of intructions to be wrapped in a new nested try/catch. A range // exists in a single BB and does not span multiple BBs. using TryRange = std::pair; // In original CFG, DenseMap> UnwindDestToTryRanges; // Gather possibly throwing calls (i.e., previously invokes) whose current // unwind destination is not the same as the original CFG. (Case 1) for (auto &MBB : reverse(MF)) { bool SeenThrowableInstInBB = false; for (auto &MI : reverse(MBB)) { if (MI.getOpcode() == WebAssembly::TRY) EHPadStack.pop_back(); else if (WebAssembly::isCatch(MI.getOpcode())) EHPadStack.push_back(MI.getParent()); // In this loop we only gather calls that have an EH pad to unwind. So // there will be at most 1 such call (= invoke) in a BB, so after we've // seen one, we can skip the rest of BB. Also if MBB has no EH pad // successor or MI does not throw, this is not an invoke. if (SeenThrowableInstInBB || !MBB.hasEHPadSuccessor() || !WebAssembly::mayThrow(MI)) continue; SeenThrowableInstInBB = true; // If the EH pad on the stack top is where this instruction should unwind // next, we're good. MachineBasicBlock *UnwindDest = getFakeCallerBlock(MF); for (auto *Succ : MBB.successors()) { // Even though semantically a BB can have multiple successors in case an // exception is not caught by a catchpad, in our backend implementation // it is guaranteed that a BB can have at most one EH pad successor. For // details, refer to comments in findWasmUnwindDestinations function in // SelectionDAGBuilder.cpp. if (Succ->isEHPad()) { UnwindDest = Succ; break; } } if (EHPadStack.back() == UnwindDest) continue; // Include EH_LABELs in the range before and afer the invoke MachineInstr *RangeBegin = &MI, *RangeEnd = &MI; if (RangeBegin->getIterator() != MBB.begin() && std::prev(RangeBegin->getIterator())->isEHLabel()) RangeBegin = &*std::prev(RangeBegin->getIterator()); if (std::next(RangeEnd->getIterator()) != MBB.end() && std::next(RangeEnd->getIterator())->isEHLabel()) RangeEnd = &*std::next(RangeEnd->getIterator()); // If not, record the range. UnwindDestToTryRanges[UnwindDest].push_back( TryRange(RangeBegin, RangeEnd)); LLVM_DEBUG(dbgs() << "- Call unwind mismatch: MBB = " << MBB.getName() << "\nCall = " << MI << "\nOriginal dest = " << UnwindDest->getName() << " Current dest = " << EHPadStack.back()->getName() << "\n\n"); } } assert(EHPadStack.empty()); // Gather possibly throwing calls that are supposed to unwind up to the caller // if they throw, but currently unwind to an incorrect destination. Unlike the // loop above, there can be multiple calls within a BB that unwind to the // caller, which we should group together in a range. (Case 2) MachineInstr *RangeBegin = nullptr, *RangeEnd = nullptr; // inclusive // Record the range. auto RecordCallerMismatchRange = [&](const MachineBasicBlock *CurrentDest) { UnwindDestToTryRanges[getFakeCallerBlock(MF)].push_back( TryRange(RangeBegin, RangeEnd)); LLVM_DEBUG(dbgs() << "- Call unwind mismatch: MBB = " << RangeBegin->getParent()->getName() << "\nRange begin = " << *RangeBegin << "Range end = " << *RangeEnd << "\nOriginal dest = caller Current dest = " << CurrentDest->getName() << "\n\n"); RangeBegin = RangeEnd = nullptr; // Reset range pointers }; for (auto &MBB : reverse(MF)) { bool SeenThrowableInstInBB = false; for (auto &MI : reverse(MBB)) { bool MayThrow = WebAssembly::mayThrow(MI); // If MBB has an EH pad successor and this is the last instruction that // may throw, this instruction unwinds to the EH pad and not to the // caller. if (MBB.hasEHPadSuccessor() && MayThrow && !SeenThrowableInstInBB) SeenThrowableInstInBB = true; // We wrap up the current range when we see a marker even if we haven't // finished a BB. else if (RangeEnd && WebAssembly::isMarker(MI.getOpcode())) RecordCallerMismatchRange(EHPadStack.back()); // If EHPadStack is empty, that means it correctly unwinds to the caller // if it throws, so we're good. If MI does not throw, we're good too. else if (EHPadStack.empty() || !MayThrow) { } // We found an instruction that unwinds to the caller but currently has an // incorrect unwind destination. Create a new range or increment the // currently existing range. else { if (!RangeEnd) RangeBegin = RangeEnd = &MI; else RangeBegin = &MI; } // Update EHPadStack. if (MI.getOpcode() == WebAssembly::TRY) EHPadStack.pop_back(); else if (WebAssembly::isCatch(MI.getOpcode())) EHPadStack.push_back(MI.getParent()); } if (RangeEnd) RecordCallerMismatchRange(EHPadStack.back()); } assert(EHPadStack.empty()); // We don't have any unwind destination mismatches to resolve. if (UnwindDestToTryRanges.empty()) return false; // Now we fix the mismatches by wrapping calls with inner try-delegates. for (auto &P : UnwindDestToTryRanges) { NumCallUnwindMismatches += P.second.size(); MachineBasicBlock *UnwindDest = P.first; auto &TryRanges = P.second; for (auto Range : TryRanges) { MachineInstr *RangeBegin = nullptr, *RangeEnd = nullptr; std::tie(RangeBegin, RangeEnd) = Range; auto *MBB = RangeBegin->getParent(); // If this BB has an EH pad successor, i.e., ends with an 'invoke', now we // are going to wrap the invoke with try-delegate, making the 'delegate' // BB the new successor instead, so remove the EH pad succesor here. The // BB may not have an EH pad successor if calls in this BB throw to the // caller. MachineBasicBlock *EHPad = nullptr; for (auto *Succ : MBB->successors()) { if (Succ->isEHPad()) { EHPad = Succ; break; } } if (EHPad) MBB->removeSuccessor(EHPad); addTryDelegate(RangeBegin, RangeEnd, UnwindDest); } } return true; } bool WebAssemblyCFGStackify::fixCatchUnwindMismatches(MachineFunction &MF) { // There is another kind of unwind destination mismatches besides call unwind // mismatches, which we will call "catch unwind mismatches". See this example // after the marker placement: // try // try // call @foo // catch __cpp_exception ;; ehpad A (next unwind dest: caller) // ... // end_try // catch_all ;; ehpad B // ... // end_try // // 'call @foo's unwind destination is the ehpad A. But suppose 'call @foo' // throws a foreign exception that is not caught by ehpad A, and its next // destination should be the caller. But after control flow linearization, // another EH pad can be placed in between (e.g. ehpad B here), making the // next unwind destination incorrect. In this case, the foreign exception // will instead go to ehpad B and will be caught there instead. In this // example the correct next unwind destination is the caller, but it can be // another outer catch in other cases. // // There is no specific 'call' or 'throw' instruction to wrap with a // try-delegate, so we wrap the whole try-catch-end with a try-delegate and // make it rethrow to the right destination, as in the example below: // try // try ;; (new) // try // call @foo // catch __cpp_exception ;; ehpad A (next unwind dest: caller) // ... // end_try // delegate 1 (caller) ;; (new) // catch_all ;; ehpad B // ... // end_try const auto *EHInfo = MF.getWasmEHFuncInfo(); assert(EHInfo); SmallVector EHPadStack; // For EH pads that have catch unwind mismatches, a map of . DenseMap EHPadToUnwindDest; for (auto &MBB : reverse(MF)) { for (auto &MI : reverse(MBB)) { if (MI.getOpcode() == WebAssembly::TRY) EHPadStack.pop_back(); else if (MI.getOpcode() == WebAssembly::DELEGATE) EHPadStack.push_back(&MBB); else if (WebAssembly::isCatch(MI.getOpcode())) { auto *EHPad = &MBB; // catch_all always catches an exception, so we don't need to do // anything if (MI.getOpcode() == WebAssembly::CATCH_ALL) { } // This can happen when the unwind dest was removed during the // optimization, e.g. because it was unreachable. else if (EHPadStack.empty() && EHInfo->hasUnwindDest(EHPad)) { LLVM_DEBUG(dbgs() << "EHPad (" << EHPad->getName() << "'s unwind destination does not exist anymore" << "\n\n"); } // The EHPad's next unwind destination is the caller, but we incorrectly // unwind to another EH pad. else if (!EHPadStack.empty() && !EHInfo->hasUnwindDest(EHPad)) { EHPadToUnwindDest[EHPad] = getFakeCallerBlock(MF); LLVM_DEBUG(dbgs() << "- Catch unwind mismatch:\nEHPad = " << EHPad->getName() << " Original dest = caller Current dest = " << EHPadStack.back()->getName() << "\n\n"); } // The EHPad's next unwind destination is an EH pad, whereas we // incorrectly unwind to another EH pad. else if (!EHPadStack.empty() && EHInfo->hasUnwindDest(EHPad)) { auto *UnwindDest = EHInfo->getUnwindDest(EHPad); if (EHPadStack.back() != UnwindDest) { EHPadToUnwindDest[EHPad] = UnwindDest; LLVM_DEBUG(dbgs() << "- Catch unwind mismatch:\nEHPad = " << EHPad->getName() << " Original dest = " << UnwindDest->getName() << " Current dest = " << EHPadStack.back()->getName() << "\n\n"); } } EHPadStack.push_back(EHPad); } } } assert(EHPadStack.empty()); if (EHPadToUnwindDest.empty()) return false; NumCatchUnwindMismatches += EHPadToUnwindDest.size(); SmallPtrSet NewEndTryBBs; for (auto &P : EHPadToUnwindDest) { MachineBasicBlock *EHPad = P.first; MachineBasicBlock *UnwindDest = P.second; MachineInstr *Try = EHPadToTry[EHPad]; MachineInstr *EndTry = BeginToEnd[Try]; addTryDelegate(Try, EndTry, UnwindDest); NewEndTryBBs.insert(EndTry->getParent()); } // Adding a try-delegate wrapping an existing try-catch-end can make existing // branch destination BBs invalid. For example, // // - Before: // bb0: // block // br bb3 // bb1: // try // ... // bb2: (ehpad) // catch // bb3: // end_try // end_block ;; 'br bb3' targets here // // Suppose this try-catch-end has a catch unwind mismatch, so we need to wrap // this with a try-delegate. Then this becomes: // // - After: // bb0: // block // br bb3 ;; invalid destination! // bb1: // try ;; (new instruction) // try // ... // bb2: (ehpad) // catch // bb3: // end_try ;; 'br bb3' still incorrectly targets here! // delegate_bb: ;; (new BB) // delegate ;; (new instruction) // split_bb: ;; (new BB) // end_block // // Now 'br bb3' incorrectly branches to an inner scope. // // As we can see in this case, when branches target a BB that has both // 'end_try' and 'end_block' and the BB is split to insert a 'delegate', we // have to remap existing branch destinations so that they target not the // 'end_try' BB but the new 'end_block' BB. There can be multiple 'delegate's // in between, so we try to find the next BB with 'end_block' instruction. In // this example, the 'br bb3' instruction should be remapped to 'br split_bb'. for (auto &MBB : MF) { for (auto &MI : MBB) { if (MI.isTerminator()) { for (auto &MO : MI.operands()) { if (MO.isMBB() && NewEndTryBBs.count(MO.getMBB())) { auto *BrDest = MO.getMBB(); bool FoundEndBlock = false; for (; std::next(BrDest->getIterator()) != MF.end(); BrDest = BrDest->getNextNode()) { for (const auto &MI : *BrDest) { if (MI.getOpcode() == WebAssembly::END_BLOCK) { FoundEndBlock = true; break; } } if (FoundEndBlock) break; } assert(FoundEndBlock); MO.setMBB(BrDest); } } } } } return true; } void WebAssemblyCFGStackify::recalculateScopeTops(MachineFunction &MF) { // Renumber BBs and recalculate ScopeTop info because new BBs might have been // created and inserted during fixing unwind mismatches. MF.RenumberBlocks(); ScopeTops.clear(); ScopeTops.resize(MF.getNumBlockIDs()); for (auto &MBB : reverse(MF)) { for (auto &MI : reverse(MBB)) { if (ScopeTops[MBB.getNumber()]) break; switch (MI.getOpcode()) { case WebAssembly::END_BLOCK: case WebAssembly::END_LOOP: case WebAssembly::END_TRY: case WebAssembly::DELEGATE: updateScopeTops(EndToBegin[&MI]->getParent(), &MBB); break; case WebAssembly::CATCH: case WebAssembly::CATCH_ALL: updateScopeTops(EHPadToTry[&MBB]->getParent(), &MBB); break; } } } } /// In normal assembly languages, when the end of a function is unreachable, /// because the function ends in an infinite loop or a noreturn call or similar, /// it isn't necessary to worry about the function return type at the end of /// the function, because it's never reached. However, in WebAssembly, blocks /// that end at the function end need to have a return type signature that /// matches the function signature, even though it's unreachable. This function /// checks for such cases and fixes up the signatures. void WebAssemblyCFGStackify::fixEndsAtEndOfFunction(MachineFunction &MF) { const auto &MFI = *MF.getInfo(); if (MFI.getResults().empty()) return; // MCInstLower will add the proper types to multivalue signatures based on the // function return type WebAssembly::BlockType RetType = MFI.getResults().size() > 1 ? WebAssembly::BlockType::Multivalue : WebAssembly::BlockType( WebAssembly::toValType(MFI.getResults().front())); SmallVector Worklist; Worklist.push_back(MF.rbegin()->rbegin()); auto Process = [&](MachineBasicBlock::reverse_iterator It) { auto *MBB = It->getParent(); while (It != MBB->rend()) { MachineInstr &MI = *It++; if (MI.isPosition() || MI.isDebugInstr()) continue; switch (MI.getOpcode()) { case WebAssembly::END_TRY: { // If a 'try''s return type is fixed, both its try body and catch body // should satisfy the return type, so we need to search 'end' // instructions before its corresponding 'catch' too. auto *EHPad = TryToEHPad.lookup(EndToBegin[&MI]); assert(EHPad); auto NextIt = std::next(WebAssembly::findCatch(EHPad)->getReverseIterator()); if (NextIt != EHPad->rend()) Worklist.push_back(NextIt); [[fallthrough]]; } case WebAssembly::END_BLOCK: case WebAssembly::END_LOOP: case WebAssembly::DELEGATE: EndToBegin[&MI]->getOperand(0).setImm(int32_t(RetType)); continue; default: // Something other than an `end`. We're done for this BB. return; } } // We've reached the beginning of a BB. Continue the search in the previous // BB. Worklist.push_back(MBB->getPrevNode()->rbegin()); }; while (!Worklist.empty()) Process(Worklist.pop_back_val()); } // WebAssembly functions end with an end instruction, as if the function body // were a block. static void appendEndToFunction(MachineFunction &MF, const WebAssemblyInstrInfo &TII) { BuildMI(MF.back(), MF.back().end(), MF.back().findPrevDebugLoc(MF.back().end()), TII.get(WebAssembly::END_FUNCTION)); } /// Insert LOOP/TRY/BLOCK markers at appropriate places. void WebAssemblyCFGStackify::placeMarkers(MachineFunction &MF) { // We allocate one more than the number of blocks in the function to // accommodate for the possible fake block we may insert at the end. ScopeTops.resize(MF.getNumBlockIDs() + 1); // Place the LOOP for MBB if MBB is the header of a loop. for (auto &MBB : MF) placeLoopMarker(MBB); const MCAsmInfo *MCAI = MF.getTarget().getMCAsmInfo(); for (auto &MBB : MF) { if (MBB.isEHPad()) { // Place the TRY for MBB if MBB is the EH pad of an exception. if (MCAI->getExceptionHandlingType() == ExceptionHandling::Wasm && MF.getFunction().hasPersonalityFn()) placeTryMarker(MBB); } else { // Place the BLOCK for MBB if MBB is branched to from above. placeBlockMarker(MBB); } } // Fix mismatches in unwind destinations induced by linearizing the code. if (MCAI->getExceptionHandlingType() == ExceptionHandling::Wasm && MF.getFunction().hasPersonalityFn()) { bool Changed = fixCallUnwindMismatches(MF); Changed |= fixCatchUnwindMismatches(MF); if (Changed) recalculateScopeTops(MF); } } unsigned WebAssemblyCFGStackify::getBranchDepth( const SmallVectorImpl &Stack, const MachineBasicBlock *MBB) { unsigned Depth = 0; for (auto X : reverse(Stack)) { if (X.first == MBB) break; ++Depth; } assert(Depth < Stack.size() && "Branch destination should be in scope"); return Depth; } unsigned WebAssemblyCFGStackify::getDelegateDepth( const SmallVectorImpl &Stack, const MachineBasicBlock *MBB) { if (MBB == FakeCallerBB) return Stack.size(); // Delegate's destination is either a catch or a another delegate BB. When the // destination is another delegate, we can compute the argument in the same // way as branches, because the target delegate BB only contains the single // delegate instruction. if (!MBB->isEHPad()) // Target is a delegate BB return getBranchDepth(Stack, MBB); // When the delegate's destination is a catch BB, we need to use its // corresponding try's end_try BB because Stack contains each marker's end BB. // Also we need to check if the end marker instruction matches, because a // single BB can contain multiple end markers, like this: // bb: // END_BLOCK // END_TRY // END_BLOCK // END_TRY // ... // // In case of branches getting the immediate that targets any of these is // fine, but delegate has to exactly target the correct try. unsigned Depth = 0; const MachineInstr *EndTry = BeginToEnd[EHPadToTry[MBB]]; for (auto X : reverse(Stack)) { if (X.first == EndTry->getParent() && X.second == EndTry) break; ++Depth; } assert(Depth < Stack.size() && "Delegate destination should be in scope"); return Depth; } unsigned WebAssemblyCFGStackify::getRethrowDepth( const SmallVectorImpl &Stack, - const SmallVectorImpl &EHPadStack) { + const MachineBasicBlock *EHPadToRethrow) { unsigned Depth = 0; - // In our current implementation, rethrows always rethrow the exception caught - // by the innermost enclosing catch. This means while traversing Stack in the - // reverse direction, when we encounter END_TRY, we should check if the - // END_TRY corresponds to the current innermost EH pad. For example: - // try - // ... - // catch ;; (a) - // try - // rethrow 1 ;; (b) - // catch ;; (c) - // rethrow 0 ;; (d) - // end ;; (e) - // end ;; (f) - // - // When we are at 'rethrow' (d), while reversely traversing Stack the first - // 'end' we encounter is the 'end' (e), which corresponds to the 'catch' (c). - // And 'rethrow' (d) rethrows the exception caught by 'catch' (c), so we stop - // there and the depth should be 0. But when we are at 'rethrow' (b), it - // rethrows the exception caught by 'catch' (a), so when traversing Stack - // reversely, we should skip the 'end' (e) and choose 'end' (f), which - // corresponds to 'catch' (a). for (auto X : reverse(Stack)) { const MachineInstr *End = X.second; if (End->getOpcode() == WebAssembly::END_TRY) { auto *EHPad = TryToEHPad[EndToBegin[End]]; - if (EHPadStack.back() == EHPad) + if (EHPadToRethrow == EHPad) break; } ++Depth; } assert(Depth < Stack.size() && "Rethrow destination should be in scope"); return Depth; } void WebAssemblyCFGStackify::rewriteDepthImmediates(MachineFunction &MF) { // Now rewrite references to basic blocks to be depth immediates. SmallVector Stack; - SmallVector EHPadStack; for (auto &MBB : reverse(MF)) { for (MachineInstr &MI : llvm::reverse(MBB)) { switch (MI.getOpcode()) { case WebAssembly::BLOCK: case WebAssembly::TRY: assert(ScopeTops[Stack.back().first->getNumber()]->getNumber() <= MBB.getNumber() && "Block/try marker should be balanced"); Stack.pop_back(); break; case WebAssembly::LOOP: assert(Stack.back().first == &MBB && "Loop top should be balanced"); Stack.pop_back(); break; case WebAssembly::END_BLOCK: + case WebAssembly::END_TRY: Stack.push_back(std::make_pair(&MBB, &MI)); break; - case WebAssembly::END_TRY: { - // We handle DELEGATE in the default level, because DELEGATE has - // immediate operands to rewrite. - Stack.push_back(std::make_pair(&MBB, &MI)); - auto *EHPad = TryToEHPad[EndToBegin[&MI]]; - EHPadStack.push_back(EHPad); - break; - } - case WebAssembly::END_LOOP: Stack.push_back(std::make_pair(EndToBegin[&MI]->getParent(), &MI)); break; - case WebAssembly::CATCH: - case WebAssembly::CATCH_ALL: - EHPadStack.pop_back(); - break; - - case WebAssembly::RETHROW: - MI.getOperand(0).setImm(getRethrowDepth(Stack, EHPadStack)); - break; - default: if (MI.isTerminator()) { // Rewrite MBB operands to be depth immediates. SmallVector Ops(MI.operands()); while (MI.getNumOperands() > 0) MI.removeOperand(MI.getNumOperands() - 1); for (auto MO : Ops) { if (MO.isMBB()) { if (MI.getOpcode() == WebAssembly::DELEGATE) MO = MachineOperand::CreateImm( getDelegateDepth(Stack, MO.getMBB())); + else if (MI.getOpcode() == WebAssembly::RETHROW) + MO = MachineOperand::CreateImm( + getRethrowDepth(Stack, MO.getMBB())); else MO = MachineOperand::CreateImm( getBranchDepth(Stack, MO.getMBB())); } MI.addOperand(MF, MO); } } if (MI.getOpcode() == WebAssembly::DELEGATE) Stack.push_back(std::make_pair(&MBB, &MI)); break; } } } assert(Stack.empty() && "Control flow should be balanced"); } void WebAssemblyCFGStackify::cleanupFunctionData(MachineFunction &MF) { if (FakeCallerBB) MF.deleteMachineBasicBlock(FakeCallerBB); AppendixBB = FakeCallerBB = nullptr; } void WebAssemblyCFGStackify::releaseMemory() { ScopeTops.clear(); BeginToEnd.clear(); EndToBegin.clear(); TryToEHPad.clear(); EHPadToTry.clear(); } bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "********** CFG Stackifying **********\n" "********** Function: " << MF.getName() << '\n'); const MCAsmInfo *MCAI = MF.getTarget().getMCAsmInfo(); releaseMemory(); // Liveness is not tracked for VALUE_STACK physreg. MF.getRegInfo().invalidateLiveness(); // Place the BLOCK/LOOP/TRY markers to indicate the beginnings of scopes. placeMarkers(MF); // Remove unnecessary instructions possibly introduced by try/end_trys. if (MCAI->getExceptionHandlingType() == ExceptionHandling::Wasm && MF.getFunction().hasPersonalityFn()) removeUnnecessaryInstrs(MF); // Convert MBB operands in terminators to relative depth immediates. rewriteDepthImmediates(MF); // Fix up block/loop/try signatures at the end of the function to conform to // WebAssembly's rules. fixEndsAtEndOfFunction(MF); // Add an end instruction at the end of the function body. const auto &TII = *MF.getSubtarget().getInstrInfo(); if (!MF.getSubtarget() .getTargetTriple() .isOSBinFormatELF()) appendEndToFunction(MF, TII); cleanupFunctionData(MF); MF.getInfo()->setCFGStackified(); return true; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp index 0f06f54f219f..18545e92886a 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp @@ -1,421 +1,434 @@ //- WebAssemblyISelDAGToDAG.cpp - A dag to dag inst selector for WebAssembly -// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// /// \file /// This file defines an instruction selector for the WebAssembly target. /// //===----------------------------------------------------------------------===// #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "WebAssembly.h" #include "WebAssemblyISelLowering.h" #include "WebAssemblyTargetMachine.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/WasmEHFuncInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" // To access function attributes. #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/Support/Debug.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; #define DEBUG_TYPE "wasm-isel" #define PASS_NAME "WebAssembly Instruction Selection" //===--------------------------------------------------------------------===// /// WebAssembly-specific code to select WebAssembly machine instructions for /// SelectionDAG operations. /// namespace { class WebAssemblyDAGToDAGISel final : public SelectionDAGISel { /// Keep a pointer to the WebAssemblySubtarget around so that we can make the /// right decision when generating code for different targets. const WebAssemblySubtarget *Subtarget; public: WebAssemblyDAGToDAGISel() = delete; WebAssemblyDAGToDAGISel(WebAssemblyTargetMachine &TM, CodeGenOptLevel OptLevel) : SelectionDAGISel(TM, OptLevel), Subtarget(nullptr) {} bool runOnMachineFunction(MachineFunction &MF) override { LLVM_DEBUG(dbgs() << "********** ISelDAGToDAG **********\n" "********** Function: " << MF.getName() << '\n'); Subtarget = &MF.getSubtarget(); return SelectionDAGISel::runOnMachineFunction(MF); } void PreprocessISelDAG() override; void Select(SDNode *Node) override; bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector &OutOps) override; bool SelectAddrOperands32(SDValue Op, SDValue &Offset, SDValue &Addr); bool SelectAddrOperands64(SDValue Op, SDValue &Offset, SDValue &Addr); // Include the pieces autogenerated from the target description. #include "WebAssemblyGenDAGISel.inc" private: // add select functions here... bool SelectAddrOperands(MVT AddrType, unsigned ConstOpc, SDValue Op, SDValue &Offset, SDValue &Addr); bool SelectAddrAddOperands(MVT OffsetType, SDValue N, SDValue &Offset, SDValue &Addr); }; class WebAssemblyDAGToDAGISelLegacy : public SelectionDAGISelLegacy { public: static char ID; explicit WebAssemblyDAGToDAGISelLegacy(WebAssemblyTargetMachine &TM, CodeGenOptLevel OptLevel) : SelectionDAGISelLegacy( ID, std::make_unique(TM, OptLevel)) {} }; } // end anonymous namespace char WebAssemblyDAGToDAGISelLegacy::ID; INITIALIZE_PASS(WebAssemblyDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false) void WebAssemblyDAGToDAGISel::PreprocessISelDAG() { // Stack objects that should be allocated to locals are hoisted to WebAssembly // locals when they are first used. However for those without uses, we hoist // them here. It would be nice if there were some hook to do this when they // are added to the MachineFrameInfo, but that's not the case right now. MachineFrameInfo &FrameInfo = MF->getFrameInfo(); for (int Idx = 0; Idx < FrameInfo.getObjectIndexEnd(); Idx++) WebAssemblyFrameLowering::getLocalForStackObject(*MF, Idx); SelectionDAGISel::PreprocessISelDAG(); } static SDValue getTagSymNode(int Tag, SelectionDAG *DAG) { assert(Tag == WebAssembly::CPP_EXCEPTION || WebAssembly::C_LONGJMP); auto &MF = DAG->getMachineFunction(); const auto &TLI = DAG->getTargetLoweringInfo(); MVT PtrVT = TLI.getPointerTy(DAG->getDataLayout()); const char *SymName = Tag == WebAssembly::CPP_EXCEPTION ? MF.createExternalSymbolName("__cpp_exception") : MF.createExternalSymbolName("__c_longjmp"); return DAG->getTargetExternalSymbol(SymName, PtrVT); } void WebAssemblyDAGToDAGISel::Select(SDNode *Node) { // If we have a custom node, we already have selected! if (Node->isMachineOpcode()) { LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); Node->setNodeId(-1); return; } MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout()); auto GlobalGetIns = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64 : WebAssembly::GLOBAL_GET_I32; // Few custom selection stuff. SDLoc DL(Node); MachineFunction &MF = CurDAG->getMachineFunction(); switch (Node->getOpcode()) { case ISD::ATOMIC_FENCE: { if (!MF.getSubtarget().hasAtomics()) break; uint64_t SyncScopeID = Node->getConstantOperandVal(2); MachineSDNode *Fence = nullptr; switch (SyncScopeID) { case SyncScope::SingleThread: // We lower a single-thread fence to a pseudo compiler barrier instruction // preventing instruction reordering. This will not be emitted in final // binary. Fence = CurDAG->getMachineNode(WebAssembly::COMPILER_FENCE, DL, // debug loc MVT::Other, // outchain type Node->getOperand(0) // inchain ); break; case SyncScope::System: // Currently wasm only supports sequentially consistent atomics, so we // always set the order to 0 (sequentially consistent). Fence = CurDAG->getMachineNode( WebAssembly::ATOMIC_FENCE, DL, // debug loc MVT::Other, // outchain type CurDAG->getTargetConstant(0, DL, MVT::i32), // order Node->getOperand(0) // inchain ); break; default: llvm_unreachable("Unknown scope!"); } ReplaceNode(Node, Fence); CurDAG->RemoveDeadNode(Node); return; } case ISD::INTRINSIC_WO_CHAIN: { unsigned IntNo = Node->getConstantOperandVal(0); switch (IntNo) { case Intrinsic::wasm_tls_size: { MachineSDNode *TLSSize = CurDAG->getMachineNode( GlobalGetIns, DL, PtrVT, CurDAG->getTargetExternalSymbol("__tls_size", PtrVT)); ReplaceNode(Node, TLSSize); return; } case Intrinsic::wasm_tls_align: { MachineSDNode *TLSAlign = CurDAG->getMachineNode( GlobalGetIns, DL, PtrVT, CurDAG->getTargetExternalSymbol("__tls_align", PtrVT)); ReplaceNode(Node, TLSAlign); return; } } break; } case ISD::INTRINSIC_W_CHAIN: { unsigned IntNo = Node->getConstantOperandVal(1); const auto &TLI = CurDAG->getTargetLoweringInfo(); MVT PtrVT = TLI.getPointerTy(CurDAG->getDataLayout()); switch (IntNo) { case Intrinsic::wasm_tls_base: { MachineSDNode *TLSBase = CurDAG->getMachineNode( GlobalGetIns, DL, PtrVT, MVT::Other, CurDAG->getTargetExternalSymbol("__tls_base", PtrVT), Node->getOperand(0)); ReplaceNode(Node, TLSBase); return; } case Intrinsic::wasm_catch: { int Tag = Node->getConstantOperandVal(2); SDValue SymNode = getTagSymNode(Tag, CurDAG); MachineSDNode *Catch = CurDAG->getMachineNode(WebAssembly::CATCH, DL, { PtrVT, // exception pointer MVT::Other // outchain type }, { SymNode, // exception symbol Node->getOperand(0) // inchain }); ReplaceNode(Node, Catch); return; } } break; } case ISD::INTRINSIC_VOID: { unsigned IntNo = Node->getConstantOperandVal(1); switch (IntNo) { case Intrinsic::wasm_throw: { int Tag = Node->getConstantOperandVal(2); SDValue SymNode = getTagSymNode(Tag, CurDAG); MachineSDNode *Throw = CurDAG->getMachineNode(WebAssembly::THROW, DL, MVT::Other, // outchain type { SymNode, // exception symbol Node->getOperand(3), // thrown value Node->getOperand(0) // inchain }); ReplaceNode(Node, Throw); return; } + case Intrinsic::wasm_rethrow: { + // RETHROW's BB argument will be populated in LateEHPrepare. Just use a + // '0' as a placeholder for now. + MachineSDNode *Rethrow = CurDAG->getMachineNode( + WebAssembly::RETHROW, DL, + MVT::Other, // outchain type + { + CurDAG->getConstant(0, DL, MVT::i32), // placeholder + Node->getOperand(0) // inchain + }); + ReplaceNode(Node, Rethrow); + return; + } } break; } case WebAssemblyISD::CALL: case WebAssemblyISD::RET_CALL: { // CALL has both variable operands and variable results, but ISel only // supports one or the other. Split calls into two nodes glued together, one // for the operands and one for the results. These two nodes will be // recombined in a custom inserter hook into a single MachineInstr. SmallVector Ops; for (size_t i = 1; i < Node->getNumOperands(); ++i) { SDValue Op = Node->getOperand(i); // Remove the wrapper when the call target is a function, an external // symbol (which will be lowered to a library function), or an alias of // a function. If the target is not a function/external symbol, we // shouldn't remove the wrapper, because we cannot call it directly and // instead we want it to be loaded with a CONST instruction and called // with a call_indirect later. if (i == 1 && Op->getOpcode() == WebAssemblyISD::Wrapper) { SDValue NewOp = Op->getOperand(0); if (auto *GlobalOp = dyn_cast(NewOp.getNode())) { if (isa( GlobalOp->getGlobal()->stripPointerCastsAndAliases())) Op = NewOp; } else if (isa(NewOp.getNode())) { Op = NewOp; } } Ops.push_back(Op); } // Add the chain last Ops.push_back(Node->getOperand(0)); MachineSDNode *CallParams = CurDAG->getMachineNode(WebAssembly::CALL_PARAMS, DL, MVT::Glue, Ops); unsigned Results = Node->getOpcode() == WebAssemblyISD::CALL ? WebAssembly::CALL_RESULTS : WebAssembly::RET_CALL_RESULTS; SDValue Link(CallParams, 0); MachineSDNode *CallResults = CurDAG->getMachineNode(Results, DL, Node->getVTList(), Link); ReplaceNode(Node, CallResults); return; } default: break; } // Select the default instruction. SelectCode(Node); } bool WebAssemblyDAGToDAGISel::SelectInlineAsmMemoryOperand( const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector &OutOps) { switch (ConstraintID) { case InlineAsm::ConstraintCode::m: // We just support simple memory operands that just have a single address // operand and need no special handling. OutOps.push_back(Op); return false; default: break; } return true; } bool WebAssemblyDAGToDAGISel::SelectAddrAddOperands(MVT OffsetType, SDValue N, SDValue &Offset, SDValue &Addr) { assert(N.getNumOperands() == 2 && "Attempting to fold in a non-binary op"); // WebAssembly constant offsets are performed as unsigned with infinite // precision, so we need to check for NoUnsignedWrap so that we don't fold an // offset for an add that needs wrapping. if (N.getOpcode() == ISD::ADD && !N.getNode()->getFlags().hasNoUnsignedWrap()) return false; // Folds constants in an add into the offset. for (size_t i = 0; i < 2; ++i) { SDValue Op = N.getOperand(i); SDValue OtherOp = N.getOperand(i == 0 ? 1 : 0); if (ConstantSDNode *CN = dyn_cast(Op)) { Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), OffsetType); Addr = OtherOp; return true; } } return false; } bool WebAssemblyDAGToDAGISel::SelectAddrOperands(MVT AddrType, unsigned ConstOpc, SDValue N, SDValue &Offset, SDValue &Addr) { SDLoc DL(N); // Fold target global addresses into the offset. if (!TM.isPositionIndependent()) { SDValue Op(N); if (Op.getOpcode() == WebAssemblyISD::Wrapper) Op = Op.getOperand(0); if (Op.getOpcode() == ISD::TargetGlobalAddress) { Offset = Op; Addr = SDValue( CurDAG->getMachineNode(ConstOpc, DL, AddrType, CurDAG->getTargetConstant(0, DL, AddrType)), 0); return true; } } // Fold anything inside an add into the offset. if (N.getOpcode() == ISD::ADD && SelectAddrAddOperands(AddrType, N, Offset, Addr)) return true; // Likewise, treat an 'or' node as an 'add' if the or'ed bits are known to be // zero and fold them into the offset too. if (N.getOpcode() == ISD::OR) { bool OrIsAdd; if (ConstantSDNode *CN = dyn_cast(N.getOperand(1))) { OrIsAdd = CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue()); } else { KnownBits Known0 = CurDAG->computeKnownBits(N->getOperand(0), 0); KnownBits Known1 = CurDAG->computeKnownBits(N->getOperand(1), 0); OrIsAdd = (~Known0.Zero & ~Known1.Zero) == 0; } if (OrIsAdd && SelectAddrAddOperands(AddrType, N, Offset, Addr)) return true; } // Fold constant addresses into the offset. if (ConstantSDNode *CN = dyn_cast(N)) { Offset = CurDAG->getTargetConstant(CN->getZExtValue(), DL, AddrType); Addr = SDValue( CurDAG->getMachineNode(ConstOpc, DL, AddrType, CurDAG->getTargetConstant(0, DL, AddrType)), 0); return true; } // Else it's a plain old load/store with no offset. Offset = CurDAG->getTargetConstant(0, DL, AddrType); Addr = N; return true; } bool WebAssemblyDAGToDAGISel::SelectAddrOperands32(SDValue Op, SDValue &Offset, SDValue &Addr) { return SelectAddrOperands(MVT::i32, WebAssembly::CONST_I32, Op, Offset, Addr); } bool WebAssemblyDAGToDAGISel::SelectAddrOperands64(SDValue Op, SDValue &Offset, SDValue &Addr) { return SelectAddrOperands(MVT::i64, WebAssembly::CONST_I64, Op, Offset, Addr); } /// This pass converts a legalized DAG into a WebAssembly-specific DAG, ready /// for instruction scheduling. FunctionPass *llvm::createWebAssemblyISelDag(WebAssemblyTargetMachine &TM, CodeGenOptLevel OptLevel) { return new WebAssemblyDAGToDAGISelLegacy(TM, OptLevel); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td index be6547007aaf..261277f8a02c 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td @@ -1,168 +1,167 @@ //===- WebAssemblyInstrControl.td-WebAssembly control-flow ------*- tablegen -*- // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// /// \file /// WebAssembly control-flow code-gen constructs. /// //===----------------------------------------------------------------------===// let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in { // The condition operand is a boolean value which WebAssembly represents as i32. defm BR_IF : I<(outs), (ins bb_op:$dst, I32:$cond), (outs), (ins bb_op:$dst), [(brcond I32:$cond, bb:$dst)], "br_if \t$dst, $cond", "br_if \t$dst", 0x0d>; let isCodeGenOnly = 1 in defm BR_UNLESS : I<(outs), (ins bb_op:$dst, I32:$cond), (outs), (ins bb_op:$dst), []>; let isBarrier = 1 in defm BR : NRI<(outs), (ins bb_op:$dst), [(br bb:$dst)], "br \t$dst", 0x0c>; } // isBranch = 1, isTerminator = 1, hasCtrlDep = 1 def : Pat<(brcond (i32 (setne I32:$cond, 0)), bb:$dst), (BR_IF bb_op:$dst, I32:$cond)>; def : Pat<(brcond (i32 (seteq I32:$cond, 0)), bb:$dst), (BR_UNLESS bb_op:$dst, I32:$cond)>; def : Pat<(brcond (i32 (xor bool_node:$cond, (i32 1))), bb:$dst), (BR_UNLESS bb_op:$dst, I32:$cond)>; // A list of branch targets enclosed in {} and separated by comma. // Used by br_table only. def BrListAsmOperand : AsmOperandClass { let Name = "BrList"; } let OperandNamespace = "WebAssembly", OperandType = "OPERAND_BRLIST" in def brlist : Operand { let ParserMatchClass = BrListAsmOperand; let PrintMethod = "printBrList"; } // Duplicating a BR_TABLE is almost never a good idea. In particular, it can // lead to some nasty irreducibility due to tail merging when the br_table is in // a loop. let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1, isNotDuplicable = 1 in { defm BR_TABLE_I32 : I<(outs), (ins I32:$index, variable_ops), (outs), (ins brlist:$brl), [(WebAssemblybr_table I32:$index)], "br_table \t$index", "br_table \t$brl", 0x0e>; // TODO: SelectionDAG's lowering insists on using a pointer as the index for // jump tables, so in practice we don't ever use BR_TABLE_I64 in wasm32 mode // currently. defm BR_TABLE_I64 : I<(outs), (ins I64:$index, variable_ops), (outs), (ins brlist:$brl), [(WebAssemblybr_table I64:$index)], "br_table \t$index", "br_table \t$brl", 0x0e>; } // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1, isNotDuplicable = 1 // This is technically a control-flow instruction, since all it affects is the // IP. defm NOP : NRI<(outs), (ins), [], "nop", 0x01>; // Placemarkers to indicate the start or end of a block or loop scope. // These use/clobber VALUE_STACK to prevent them from being moved into the // middle of an expression tree. let Uses = [VALUE_STACK], Defs = [VALUE_STACK] in { defm BLOCK : NRI<(outs), (ins Signature:$sig), [], "block \t$sig", 0x02>; defm LOOP : NRI<(outs), (ins Signature:$sig), [], "loop \t$sig", 0x03>; defm IF : I<(outs), (ins Signature:$sig, I32:$cond), (outs), (ins Signature:$sig), [], "if \t$sig, $cond", "if \t$sig", 0x04>; defm ELSE : NRI<(outs), (ins), [], "else", 0x05>; // END_BLOCK, END_LOOP, END_IF and END_FUNCTION are represented with the same // opcode in wasm. defm END_BLOCK : NRI<(outs), (ins), [], "end_block", 0x0b>; defm END_LOOP : NRI<(outs), (ins), [], "end_loop", 0x0b>; defm END_IF : NRI<(outs), (ins), [], "end_if", 0x0b>; // Generic instruction, for disassembler. let IsCanonical = 1 in defm END : NRI<(outs), (ins), [], "end", 0x0b>; let isTerminator = 1, isBarrier = 1 in defm END_FUNCTION : NRI<(outs), (ins), [], "end_function", 0x0b>; } // Uses = [VALUE_STACK], Defs = [VALUE_STACK] let hasCtrlDep = 1, isBarrier = 1 in { let isTerminator = 1 in { let isReturn = 1 in { defm RETURN : I<(outs), (ins variable_ops), (outs), (ins), [(WebAssemblyreturn)], "return", "return", 0x0f>; // Equivalent to RETURN, for use at the end of a function when wasm // semantics return by falling off the end of the block. let isCodeGenOnly = 1 in defm FALLTHROUGH_RETURN : I<(outs), (ins variable_ops), (outs), (ins), []>; } // isReturn = 1 let IsCanonical = 1, isTrap = 1 in defm UNREACHABLE : NRI<(outs), (ins), [(trap)], "unreachable", 0x00>; } // isTerminator = 1 // debugtrap explicitly returns despite trapping because it is supposed to just // get the attention of the debugger. Unfortunately, because UNREACHABLE is a // terminator, lowering debugtrap to UNREACHABLE can create an invalid // MachineBasicBlock when there is additional code after it. Lower it to this // non-terminator version instead. // TODO: Actually execute the debugger statement when running on the Web let isTrap = 1 in defm DEBUG_UNREACHABLE : NRI<(outs), (ins), [(debugtrap)], "unreachable", 0x00>; } // hasCtrlDep = 1, isBarrier = 1 //===----------------------------------------------------------------------===// // Exception handling instructions //===----------------------------------------------------------------------===// let Predicates = [HasExceptionHandling] in { // Throwing an exception: throw / rethrow let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in { defm THROW : I<(outs), (ins tag_op:$tag, variable_ops), (outs), (ins tag_op:$tag), [], "throw \t$tag", "throw \t$tag", 0x08>; -defm RETHROW : NRI<(outs), (ins i32imm:$depth), [], "rethrow \t$depth", 0x09>; +// $ehpad is the EH pad where the exception to rethrow has been caught. +defm RETHROW : NRI<(outs), (ins bb_op:$ehpad), [], "rethrow \t$ehpad", 0x09>; } // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 -// The depth argument will be computed in CFGStackify. We set it to 0 here for -// now. -def : Pat<(int_wasm_rethrow), (RETHROW 0)>; // Region within which an exception is caught: try / end_try let Uses = [VALUE_STACK], Defs = [VALUE_STACK] in { defm TRY : NRI<(outs), (ins Signature:$sig), [], "try \t$sig", 0x06>; defm END_TRY : NRI<(outs), (ins), [], "end_try", 0x0b>; } // Uses = [VALUE_STACK], Defs = [VALUE_STACK] // Catching an exception: catch / catch_all let hasCtrlDep = 1, hasSideEffects = 1 in { let variadicOpsAreDefs = 1 in defm CATCH : I<(outs), (ins tag_op:$tag, variable_ops), (outs), (ins tag_op:$tag), [], "catch", "catch \t$tag", 0x07>; defm CATCH_ALL : NRI<(outs), (ins), [], "catch_all", 0x19>; } // Delegating an exception: delegate let isTerminator = 1, hasCtrlDep = 1, hasSideEffects = 1 in defm DELEGATE : NRI<(outs), (ins bb_op:$dst), [], "delegate \t $dst", 0x18>; // Pseudo instructions: cleanupret / catchret let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1, isPseudo = 1, isEHScopeReturn = 1 in { - defm CLEANUPRET : NRI<(outs), (ins), [(cleanupret)], "cleanupret", 0>; + defm CLEANUPRET : NRI<(outs), (ins bb_op:$ehpad), [(cleanupret bb:$ehpad)], + "cleanupret", 0>; defm CATCHRET : NRI<(outs), (ins bb_op:$dst, bb_op:$from), [(catchret bb:$dst, bb:$from)], "catchret", 0>; } // isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1, // isPseudo = 1, isEHScopeReturn = 1 } // Predicates = [HasExceptionHandling] diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp index 94037b9ab189..b8f3bcb57f6b 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp @@ -1,322 +1,350 @@ //=== WebAssemblyLateEHPrepare.cpp - WebAssembly Exception Preparation -===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// /// \file /// \brief Does various transformations for exception handling. /// //===----------------------------------------------------------------------===// #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "WebAssembly.h" #include "WebAssemblySubtarget.h" #include "WebAssemblyUtilities.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/WasmEHFuncInfo.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; #define DEBUG_TYPE "wasm-late-eh-prepare" namespace { class WebAssemblyLateEHPrepare final : public MachineFunctionPass { StringRef getPassName() const override { return "WebAssembly Late Prepare Exception"; } bool runOnMachineFunction(MachineFunction &MF) override; bool removeUnreachableEHPads(MachineFunction &MF); void recordCatchRetBBs(MachineFunction &MF); bool hoistCatches(MachineFunction &MF); bool addCatchAlls(MachineFunction &MF); bool replaceFuncletReturns(MachineFunction &MF); bool removeUnnecessaryUnreachables(MachineFunction &MF); bool restoreStackPointer(MachineFunction &MF); MachineBasicBlock *getMatchingEHPad(MachineInstr *MI); SmallPtrSet CatchRetBBs; public: static char ID; // Pass identification, replacement for typeid WebAssemblyLateEHPrepare() : MachineFunctionPass(ID) {} }; } // end anonymous namespace char WebAssemblyLateEHPrepare::ID = 0; INITIALIZE_PASS(WebAssemblyLateEHPrepare, DEBUG_TYPE, "WebAssembly Late Exception Preparation", false, false) FunctionPass *llvm::createWebAssemblyLateEHPrepare() { return new WebAssemblyLateEHPrepare(); } // Returns the nearest EH pad that dominates this instruction. This does not use // dominator analysis; it just does BFS on its predecessors until arriving at an // EH pad. This assumes valid EH scopes so the first EH pad it arrives in all // possible search paths should be the same. // Returns nullptr in case it does not find any EH pad in the search, or finds // multiple different EH pads. MachineBasicBlock * WebAssemblyLateEHPrepare::getMatchingEHPad(MachineInstr *MI) { MachineFunction *MF = MI->getParent()->getParent(); SmallVector WL; SmallPtrSet Visited; WL.push_back(MI->getParent()); MachineBasicBlock *EHPad = nullptr; while (!WL.empty()) { MachineBasicBlock *MBB = WL.pop_back_val(); if (!Visited.insert(MBB).second) continue; if (MBB->isEHPad()) { if (EHPad && EHPad != MBB) return nullptr; EHPad = MBB; continue; } if (MBB == &MF->front()) return nullptr; for (auto *Pred : MBB->predecessors()) if (!CatchRetBBs.count(Pred)) // We don't go into child scopes WL.push_back(Pred); } return EHPad; } // Erase the specified BBs if the BB does not have any remaining predecessors, // and also all its dead children. template static void eraseDeadBBsAndChildren(const Container &MBBs) { SmallVector WL(MBBs.begin(), MBBs.end()); SmallPtrSet Deleted; while (!WL.empty()) { MachineBasicBlock *MBB = WL.pop_back_val(); if (Deleted.count(MBB) || !MBB->pred_empty()) continue; SmallVector Succs(MBB->successors()); WL.append(MBB->succ_begin(), MBB->succ_end()); for (auto *Succ : Succs) MBB->removeSuccessor(Succ); // To prevent deleting the same BB multiple times, which can happen when // 'MBBs' contain both a parent and a child Deleted.insert(MBB); MBB->eraseFromParent(); } } bool WebAssemblyLateEHPrepare::runOnMachineFunction(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "********** Late EH Prepare **********\n" "********** Function: " << MF.getName() << '\n'); if (MF.getTarget().getMCAsmInfo()->getExceptionHandlingType() != ExceptionHandling::Wasm) return false; bool Changed = false; if (MF.getFunction().hasPersonalityFn()) { Changed |= removeUnreachableEHPads(MF); recordCatchRetBBs(MF); Changed |= hoistCatches(MF); Changed |= addCatchAlls(MF); Changed |= replaceFuncletReturns(MF); } Changed |= removeUnnecessaryUnreachables(MF); if (MF.getFunction().hasPersonalityFn()) Changed |= restoreStackPointer(MF); return Changed; } // Remove unreachable EH pads and its children. If they remain, CFG // stackification can be tricky. bool WebAssemblyLateEHPrepare::removeUnreachableEHPads(MachineFunction &MF) { SmallVector ToDelete; for (auto &MBB : MF) if (MBB.isEHPad() && MBB.pred_empty()) ToDelete.push_back(&MBB); eraseDeadBBsAndChildren(ToDelete); return !ToDelete.empty(); } // Record which BB ends with catchret instruction, because this will be replaced // with 'br's later. This set of catchret BBs is necessary in 'getMatchingEHPad' // function. void WebAssemblyLateEHPrepare::recordCatchRetBBs(MachineFunction &MF) { CatchRetBBs.clear(); for (auto &MBB : MF) { auto Pos = MBB.getFirstTerminator(); if (Pos == MBB.end()) continue; MachineInstr *TI = &*Pos; if (TI->getOpcode() == WebAssembly::CATCHRET) CatchRetBBs.insert(&MBB); } } // Hoist catch instructions to the beginning of their matching EH pad BBs in // case, // (1) catch instruction is not the first instruction in EH pad. // ehpad: // some_other_instruction // ... // %exn = catch 0 // (2) catch instruction is in a non-EH pad BB. For example, // ehpad: // br bb0 // bb0: // %exn = catch 0 bool WebAssemblyLateEHPrepare::hoistCatches(MachineFunction &MF) { bool Changed = false; SmallVector Catches; for (auto &MBB : MF) for (auto &MI : MBB) if (WebAssembly::isCatch(MI.getOpcode())) Catches.push_back(&MI); for (auto *Catch : Catches) { MachineBasicBlock *EHPad = getMatchingEHPad(Catch); assert(EHPad && "No matching EH pad for catch"); auto InsertPos = EHPad->begin(); // Skip EH_LABELs in the beginning of an EH pad if present. We don't use // these labels at the moment, but other targets also seem to have an // EH_LABEL instruction in the beginning of an EH pad. while (InsertPos != EHPad->end() && InsertPos->isEHLabel()) InsertPos++; if (InsertPos == Catch) continue; Changed = true; EHPad->insert(InsertPos, Catch->removeFromParent()); } return Changed; } // Add catch_all to beginning of cleanup pads. bool WebAssemblyLateEHPrepare::addCatchAlls(MachineFunction &MF) { bool Changed = false; const auto &TII = *MF.getSubtarget().getInstrInfo(); for (auto &MBB : MF) { if (!MBB.isEHPad()) continue; auto InsertPos = MBB.begin(); // Skip EH_LABELs in the beginning of an EH pad if present. while (InsertPos != MBB.end() && InsertPos->isEHLabel()) InsertPos++; // This runs after hoistCatches(), so we assume that if there is a catch, // that should be the first non-EH-label instruction in an EH pad. if (InsertPos == MBB.end() || !WebAssembly::isCatch(InsertPos->getOpcode())) { Changed = true; BuildMI(MBB, InsertPos, InsertPos == MBB.end() ? DebugLoc() : InsertPos->getDebugLoc(), TII.get(WebAssembly::CATCH_ALL)); } } return Changed; } // Replace pseudo-instructions catchret and cleanupret with br and rethrow // respectively. bool WebAssemblyLateEHPrepare::replaceFuncletReturns(MachineFunction &MF) { bool Changed = false; const auto &TII = *MF.getSubtarget().getInstrInfo(); for (auto &MBB : MF) { auto Pos = MBB.getFirstTerminator(); if (Pos == MBB.end()) continue; MachineInstr *TI = &*Pos; switch (TI->getOpcode()) { case WebAssembly::CATCHRET: { // Replace a catchret with a branch MachineBasicBlock *TBB = TI->getOperand(0).getMBB(); if (!MBB.isLayoutSuccessor(TBB)) BuildMI(MBB, TI, TI->getDebugLoc(), TII.get(WebAssembly::BR)) .addMBB(TBB); TI->eraseFromParent(); Changed = true; break; } + case WebAssembly::RETHROW: + // These RETHROWs here were lowered from llvm.wasm.rethrow() intrinsics, + // generated in Clang for when an exception is not caught by the given + // type (e.g. catch (int)). + // + // RETHROW's BB argument is the EH pad where the exception to rethrow has + // been caught. (Until this point, RETHROW has just a '0' as a placeholder + // argument.) For these llvm.wasm.rethrow()s, we can safely assume the + // exception comes from the nearest dominating EH pad, because catch.start + // EH pad is structured like this: + // + // catch.start: + // catchpad ... + // %matches = compare ehselector with typeid + // br i1 %matches, label %catch, label %rethrow + // + // rethrow: + // ;; rethrows the exception caught in 'catch.start' + // call @llvm.wasm.rethrow() + TI->removeOperand(0); + TI->addOperand(MachineOperand::CreateMBB(getMatchingEHPad(TI))); + Changed = true; + break; case WebAssembly::CLEANUPRET: { - // Replace a cleanupret with a rethrow. For C++ support, currently - // rethrow's immediate argument is always 0 (= the latest exception). + // CLEANUPRETs have the EH pad BB the exception to rethrow has been caught + // as an argument. Use it and change the instruction opcode to 'RETHROW' + // to make rethrowing instructions consistent. + // + // This is because we cannot safely assume that it is always the nearest + // dominating EH pad, in case there are code transformations such as + // inlining. BuildMI(MBB, TI, TI->getDebugLoc(), TII.get(WebAssembly::RETHROW)) - .addImm(0); + .addMBB(TI->getOperand(0).getMBB()); TI->eraseFromParent(); Changed = true; break; } } } return Changed; } // Remove unnecessary unreachables after a throw or rethrow. bool WebAssemblyLateEHPrepare::removeUnnecessaryUnreachables( MachineFunction &MF) { bool Changed = false; for (auto &MBB : MF) { for (auto &MI : MBB) { if (MI.getOpcode() != WebAssembly::THROW && MI.getOpcode() != WebAssembly::RETHROW) continue; Changed = true; // The instruction after the throw should be an unreachable or a branch to // another BB that should eventually lead to an unreachable. Delete it // because throw itself is a terminator, and also delete successors if // any. MBB.erase(std::next(MI.getIterator()), MBB.end()); SmallVector Succs(MBB.successors()); for (auto *Succ : Succs) if (!Succ->isEHPad()) MBB.removeSuccessor(Succ); eraseDeadBBsAndChildren(Succs); } } return Changed; } // After the stack is unwound due to a thrown exception, the __stack_pointer // global can point to an invalid address. This inserts instructions that // restore __stack_pointer global. bool WebAssemblyLateEHPrepare::restoreStackPointer(MachineFunction &MF) { const auto *FrameLowering = static_cast( MF.getSubtarget().getFrameLowering()); if (!FrameLowering->needsPrologForEH(MF)) return false; bool Changed = false; for (auto &MBB : MF) { if (!MBB.isEHPad()) continue; Changed = true; // Insert __stack_pointer restoring instructions at the beginning of each EH // pad, after the catch instruction. Here it is safe to assume that SP32 // holds the latest value of __stack_pointer, because the only exception for // this case is when a function uses the red zone, but that only happens // with leaf functions, and we don't restore __stack_pointer in leaf // functions anyway. auto InsertPos = MBB.begin(); // Skip EH_LABELs in the beginning of an EH pad if present. while (InsertPos != MBB.end() && InsertPos->isEHLabel()) InsertPos++; assert(InsertPos != MBB.end() && WebAssembly::isCatch(InsertPos->getOpcode()) && "catch/catch_all should be present in every EH pad at this point"); ++InsertPos; // Skip the catch instruction FrameLowering->writeSPToGlobal(FrameLowering->getSPReg(MF), MF, MBB, InsertPos, MBB.begin()->getDebugLoc()); } return Changed; } diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index 5a8177e2b360..9b13447754e4 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1,2205 +1,2206 @@ //===- X86InstrCompiler.td - Compiler Pseudos and Patterns -*- tablegen -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file describes the various pseudo instructions used by the compiler, // as well as Pat patterns used during instruction selection. // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // Pattern Matching Support def GetLo32XForm : SDNodeXFormgetZExtValue(), SDLoc(N)); }]>; //===----------------------------------------------------------------------===// // Random Pseudo Instructions. // PIC base construction. This expands to code that looks like this: // call $next_inst // popl %destreg" let hasSideEffects = 0, isNotDuplicable = 1, Uses = [ESP, SSP], SchedRW = [WriteJump] in def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label), "", []>; // ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into // a stack adjustment and the codegen must know that they may modify the stack // pointer before prolog-epilog rewriting occurs. // Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become // sub / add which can clobber EFLAGS. let Defs = [ESP, EFLAGS, SSP], Uses = [ESP, SSP], SchedRW = [WriteALU] in { def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2, i32imm:$amt3), "#ADJCALLSTACKDOWN", []>, Requires<[NotLP64]>; def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), "#ADJCALLSTACKUP", [(X86callseq_end timm:$amt1, timm:$amt2)]>, Requires<[NotLP64]>; } def : Pat<(X86callseq_start timm:$amt1, timm:$amt2), (ADJCALLSTACKDOWN32 i32imm:$amt1, i32imm:$amt2, 0)>, Requires<[NotLP64]>; // ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into // a stack adjustment and the codegen must know that they may modify the stack // pointer before prolog-epilog rewriting occurs. // Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become // sub / add which can clobber EFLAGS. let Defs = [RSP, EFLAGS, SSP], Uses = [RSP, SSP], SchedRW = [WriteALU] in { def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2, i32imm:$amt3), "#ADJCALLSTACKDOWN", []>, Requires<[IsLP64]>; def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), "#ADJCALLSTACKUP", [(X86callseq_end timm:$amt1, timm:$amt2)]>, Requires<[IsLP64]>; } def : Pat<(X86callseq_start timm:$amt1, timm:$amt2), (ADJCALLSTACKDOWN64 i32imm:$amt1, i32imm:$amt2, 0)>, Requires<[IsLP64]>; let SchedRW = [WriteSystem] in { // x86-64 va_start lowering magic. let hasSideEffects = 1, mayStore = 1, Defs = [EFLAGS] in { def VASTART_SAVE_XMM_REGS : I<0, Pseudo, (outs), (ins GR8:$al, i8mem:$regsavefi, variable_ops), "#VASTART_SAVE_XMM_REGS $al, $regsavefi", [(X86vastart_save_xmm_regs GR8:$al, addr:$regsavefi), (implicit EFLAGS)]>; } let usesCustomInserter = 1, Defs = [EFLAGS] in { // The VAARG_64 and VAARG_X32 pseudo-instructions take the address of the // va_list, and place the address of the next argument into a register. let Defs = [EFLAGS] in { def VAARG_64 : I<0, Pseudo, (outs GR64:$dst), (ins i8mem:$ap, i32imm:$size, i8imm:$mode, i32imm:$align), "#VAARG_64 $dst, $ap, $size, $mode, $align", [(set GR64:$dst, (X86vaarg64 addr:$ap, timm:$size, timm:$mode, timm:$align)), (implicit EFLAGS)]>, Requires<[In64BitMode, IsLP64]>; def VAARG_X32 : I<0, Pseudo, (outs GR32:$dst), (ins i8mem:$ap, i32imm:$size, i8imm:$mode, i32imm:$align), "#VAARG_X32 $dst, $ap, $size, $mode, $align", [(set GR32:$dst, (X86vaargx32 addr:$ap, timm:$size, timm:$mode, timm:$align)), (implicit EFLAGS)]>, Requires<[In64BitMode, NotLP64]>; } // When using segmented stacks these are lowered into instructions which first // check if the current stacklet has enough free memory. If it does, memory is // allocated by bumping the stack pointer. Otherwise memory is allocated from // the heap. let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in def SEG_ALLOCA_32 : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$size), "# variable sized alloca for segmented stacks", [(set GR32:$dst, (X86SegAlloca GR32:$size))]>, Requires<[NotLP64]>; let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size), "# variable sized alloca for segmented stacks", [(set GR64:$dst, (X86SegAlloca GR64:$size))]>, Requires<[In64BitMode]>; // To protect against stack clash, dynamic allocation should perform a memory // probe at each page. let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in def PROBED_ALLOCA_32 : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$size), "# variable sized alloca with probing", [(set GR32:$dst, (X86ProbedAlloca GR32:$size))]>, Requires<[NotLP64]>; let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in def PROBED_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size), "# variable sized alloca with probing", [(set GR64:$dst, (X86ProbedAlloca GR64:$size))]>, Requires<[In64BitMode]>; } let hasNoSchedulingInfo = 1 in def STACKALLOC_W_PROBING : I<0, Pseudo, (outs), (ins i64imm:$stacksize), "# fixed size alloca with probing", []>; // Dynamic stack allocation yields a _chkstk or _alloca call for all Windows // targets. These calls are needed to probe the stack when allocating more than // 4k bytes in one go. Touching the stack at 4K increments is necessary to // ensure that the guard pages used by the OS virtual memory manager are // allocated in correct sequence. // The main point of having separate instruction are extra unmodelled effects // (compared to ordinary calls) like stack pointer change. let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in def DYN_ALLOCA_32 : I<0, Pseudo, (outs), (ins GR32:$size), "# dynamic stack allocation", [(X86DynAlloca GR32:$size)]>, Requires<[NotLP64]>; let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in def DYN_ALLOCA_64 : I<0, Pseudo, (outs), (ins GR64:$size), "# dynamic stack allocation", [(X86DynAlloca GR64:$size)]>, Requires<[In64BitMode]>; } // SchedRW // These instructions XOR the frame pointer into a GPR. They are used in some // stack protection schemes. These are post-RA pseudos because we only know the // frame register after register allocation. let Constraints = "$src = $dst", isMoveImm = 1, isPseudo = 1, Defs = [EFLAGS] in { def XOR32_FP : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src), "xorl\t$$FP, $src", []>, Requires<[NotLP64]>, Sched<[WriteALU]>; def XOR64_FP : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src), "xorq\t$$FP $src", []>, Requires<[In64BitMode]>, Sched<[WriteALU]>; } //===----------------------------------------------------------------------===// // EH Pseudo Instructions // let SchedRW = [WriteSystem] in { let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, isCodeGenOnly = 1 in { def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr), "ret\t#eh_return, addr: $addr", [(X86ehret GR32:$addr)]>, Sched<[WriteJumpLd]>; } let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, isCodeGenOnly = 1 in { def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr), "ret\t#eh_return, addr: $addr", [(X86ehret GR64:$addr)]>, Sched<[WriteJumpLd]>; } let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1, isCodeGenOnly = 1, isReturn = 1, isEHScopeReturn = 1 in { - def CLEANUPRET : I<0, Pseudo, (outs), (ins), "# CLEANUPRET", [(cleanupret)]>; + def CLEANUPRET : I<0, Pseudo, (outs), (ins), "# CLEANUPRET", + [(cleanupret bb)]>; // CATCHRET needs a custom inserter for SEH. let usesCustomInserter = 1 in def CATCHRET : I<0, Pseudo, (outs), (ins brtarget32:$dst, brtarget32:$from), "# CATCHRET", [(catchret bb:$dst, bb:$from)]>; } let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in { def EH_SjLj_SetJmp32 : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$buf), "#EH_SJLJ_SETJMP32", [(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>, Requires<[Not64BitMode]>; def EH_SjLj_SetJmp64 : I<0, Pseudo, (outs GR32:$dst), (ins i64mem:$buf), "#EH_SJLJ_SETJMP64", [(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>, Requires<[In64BitMode]>; let isTerminator = 1 in { def EH_SjLj_LongJmp32 : I<0, Pseudo, (outs), (ins i32mem:$buf), "#EH_SJLJ_LONGJMP32", [(X86eh_sjlj_longjmp addr:$buf)]>, Requires<[Not64BitMode]>; def EH_SjLj_LongJmp64 : I<0, Pseudo, (outs), (ins i64mem:$buf), "#EH_SJLJ_LONGJMP64", [(X86eh_sjlj_longjmp addr:$buf)]>, Requires<[In64BitMode]>; } } let isBranch = 1, isTerminator = 1, isCodeGenOnly = 1 in { def EH_SjLj_Setup : I<0, Pseudo, (outs), (ins brtarget:$dst), "#EH_SjLj_Setup\t$dst", []>; } } // SchedRW //===----------------------------------------------------------------------===// // Pseudo instructions used by unwind info. // let isPseudo = 1, SchedRW = [WriteSystem] in { def SEH_PushReg : I<0, Pseudo, (outs), (ins i32imm:$reg), "#SEH_PushReg $reg", []>; def SEH_SaveReg : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst), "#SEH_SaveReg $reg, $dst", []>; def SEH_SaveXMM : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst), "#SEH_SaveXMM $reg, $dst", []>; def SEH_StackAlloc : I<0, Pseudo, (outs), (ins i32imm:$size), "#SEH_StackAlloc $size", []>; def SEH_StackAlign : I<0, Pseudo, (outs), (ins i32imm:$align), "#SEH_StackAlign $align", []>; def SEH_SetFrame : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$offset), "#SEH_SetFrame $reg, $offset", []>; def SEH_PushFrame : I<0, Pseudo, (outs), (ins i1imm:$mode), "#SEH_PushFrame $mode", []>; def SEH_EndPrologue : I<0, Pseudo, (outs), (ins), "#SEH_EndPrologue", []>; def SEH_Epilogue : I<0, Pseudo, (outs), (ins), "#SEH_Epilogue", []>; } //===----------------------------------------------------------------------===// // Pseudo instructions used by KCFI. //===----------------------------------------------------------------------===// let Defs = [R10, R11, EFLAGS] in { def KCFI_CHECK : PseudoI< (outs), (ins GR64:$ptr, i32imm:$type), []>, Sched<[]>; } //===----------------------------------------------------------------------===// // Pseudo instructions used by address sanitizer. //===----------------------------------------------------------------------===// let Defs = [R10, R11, EFLAGS] in { def ASAN_CHECK_MEMACCESS : PseudoI< (outs), (ins GR64PLTSafe:$addr, i32imm:$accessinfo), [(int_asan_check_memaccess GR64PLTSafe:$addr, (i32 timm:$accessinfo))]>, Sched<[]>; } //===----------------------------------------------------------------------===// // Pseudo instructions used by segmented stacks. // // This is lowered into a RET instruction by MCInstLower. We need // this so that we don't have to have a MachineBasicBlock which ends // with a RET and also has successors. let isPseudo = 1, SchedRW = [WriteJumpLd] in { def MORESTACK_RET: I<0, Pseudo, (outs), (ins), "", []>; // This instruction is lowered to a RET followed by a MOV. The two // instructions are not generated on a higher level since then the // verifier sees a MachineBasicBlock ending with a non-terminator. def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins), "", []>; } //===----------------------------------------------------------------------===// // Alias Instructions //===----------------------------------------------------------------------===// // Alias instruction mapping movr0 to xor. // FIXME: remove when we can teach regalloc that xor reg, reg is ok. let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1, isMoveImm = 1, AddedComplexity = 10 in def MOV32r0 : I<0, Pseudo, (outs GR32:$dst), (ins), "", [(set GR32:$dst, 0)]>, Sched<[WriteZero]>; // Other widths can also make use of the 32-bit xor, which may have a smaller // encoding and avoid partial register updates. let AddedComplexity = 10 in { def : Pat<(i8 0), (EXTRACT_SUBREG (MOV32r0), sub_8bit)>; def : Pat<(i16 0), (EXTRACT_SUBREG (MOV32r0), sub_16bit)>; def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)>; } let Predicates = [OptForSize, Not64BitMode], AddedComplexity = 10 in { let SchedRW = [WriteALU] in { // Pseudo instructions for materializing 1 and -1 using XOR+INC/DEC, // which only require 3 bytes compared to MOV32ri which requires 5. let Defs = [EFLAGS], isReMaterializable = 1, isPseudo = 1 in { def MOV32r1 : I<0, Pseudo, (outs GR32:$dst), (ins), "", [(set GR32:$dst, 1)]>; def MOV32r_1 : I<0, Pseudo, (outs GR32:$dst), (ins), "", [(set GR32:$dst, -1)]>; } } // SchedRW // MOV16ri is 4 bytes, so the instructions above are smaller. def : Pat<(i16 1), (EXTRACT_SUBREG (MOV32r1), sub_16bit)>; def : Pat<(i16 -1), (EXTRACT_SUBREG (MOV32r_1), sub_16bit)>; } let isReMaterializable = 1, isPseudo = 1, AddedComplexity = 5, SchedRW = [WriteALU] in { // AddedComplexity higher than MOV64ri but lower than MOV32r0 and MOV32r1. def MOV32ImmSExti8 : I<0, Pseudo, (outs GR32:$dst), (ins i32i8imm:$src), "", [(set GR32:$dst, i32immSExt8:$src)]>, Requires<[OptForMinSize, NotWin64WithoutFP]>; def MOV64ImmSExti8 : I<0, Pseudo, (outs GR64:$dst), (ins i64i8imm:$src), "", [(set GR64:$dst, i64immSExt8:$src)]>, Requires<[OptForMinSize, NotWin64WithoutFP]>; } // Materialize i64 constant where top 32-bits are zero. This could theoretically // use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however // that would make it more difficult to rematerialize. let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1, SchedRW = [WriteMove] in def MOV32ri64 : I<0, Pseudo, (outs GR64:$dst), (ins i64i32imm:$src), "", [(set GR64:$dst, i64immZExt32:$src)]>; // This 64-bit pseudo-move can also be used for labels in the x86-64 small code // model. def mov64imm32 : ComplexPattern; def : Pat<(i64 mov64imm32:$src), (MOV32ri64 mov64imm32:$src)>; // Use sbb to materialize carry bit. let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteADC], hasSideEffects = 0 in { // FIXME: These are pseudo ops that should be replaced with Pat<> patterns. // However, Pat<> can't replicate the destination reg into the inputs of the // result. def SETB_C32r : I<0, Pseudo, (outs GR32:$dst), (ins), "", []>; def SETB_C64r : I<0, Pseudo, (outs GR64:$dst), (ins), "", []>; } // isCodeGenOnly //===----------------------------------------------------------------------===// // String Pseudo Instructions // let SchedRW = [WriteMicrocoded] in { let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in { def REP_MOVSB_32 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb (%esi), %es:(%edi)|rep movsb es:[edi], [esi]}", [(X86rep_movs i8)]>, REP, AdSize32, Requires<[NotLP64]>; def REP_MOVSW_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw (%esi), %es:(%edi)|rep movsw es:[edi], [esi]}", [(X86rep_movs i16)]>, REP, AdSize32, OpSize16, Requires<[NotLP64]>; def REP_MOVSD_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl (%esi), %es:(%edi)|rep movsd es:[edi], [esi]}", [(X86rep_movs i32)]>, REP, AdSize32, OpSize32, Requires<[NotLP64]>; def REP_MOVSQ_32 : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq (%esi), %es:(%edi)|rep movsq es:[edi], [esi]}", [(X86rep_movs i64)]>, REP, AdSize32, Requires<[NotLP64, In64BitMode]>; } let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in { def REP_MOVSB_64 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb (%rsi), %es:(%rdi)|rep movsb es:[rdi], [rsi]}", [(X86rep_movs i8)]>, REP, AdSize64, Requires<[IsLP64]>; def REP_MOVSW_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw (%rsi), %es:(%rdi)|rep movsw es:[rdi], [rsi]}", [(X86rep_movs i16)]>, REP, AdSize64, OpSize16, Requires<[IsLP64]>; def REP_MOVSD_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl (%rsi), %es:(%rdi)|rep movsdi es:[rdi], [rsi]}", [(X86rep_movs i32)]>, REP, AdSize64, OpSize32, Requires<[IsLP64]>; def REP_MOVSQ_64 : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq (%rsi), %es:(%rdi)|rep movsq es:[rdi], [rsi]}", [(X86rep_movs i64)]>, REP, AdSize64, Requires<[IsLP64]>; } // FIXME: Should use "(X86rep_stos AL)" as the pattern. let Defs = [ECX,EDI], isCodeGenOnly = 1 in { let Uses = [AL,ECX,EDI] in def REP_STOSB_32 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb %al, %es:(%edi)|rep stosb es:[edi], al}", [(X86rep_stos i8)]>, REP, AdSize32, Requires<[NotLP64]>; let Uses = [AX,ECX,EDI] in def REP_STOSW_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw %ax, %es:(%edi)|rep stosw es:[edi], ax}", [(X86rep_stos i16)]>, REP, AdSize32, OpSize16, Requires<[NotLP64]>; let Uses = [EAX,ECX,EDI] in def REP_STOSD_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl %eax, %es:(%edi)|rep stosd es:[edi], eax}", [(X86rep_stos i32)]>, REP, AdSize32, OpSize32, Requires<[NotLP64]>; let Uses = [RAX,RCX,RDI] in def REP_STOSQ_32 : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq %rax, %es:(%edi)|rep stosq es:[edi], rax}", [(X86rep_stos i64)]>, REP, AdSize32, Requires<[NotLP64, In64BitMode]>; } let Defs = [RCX,RDI], isCodeGenOnly = 1 in { let Uses = [AL,RCX,RDI] in def REP_STOSB_64 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb %al, %es:(%rdi)|rep stosb es:[rdi], al}", [(X86rep_stos i8)]>, REP, AdSize64, Requires<[IsLP64]>; let Uses = [AX,RCX,RDI] in def REP_STOSW_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw %ax, %es:(%rdi)|rep stosw es:[rdi], ax}", [(X86rep_stos i16)]>, REP, AdSize64, OpSize16, Requires<[IsLP64]>; let Uses = [RAX,RCX,RDI] in def REP_STOSD_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl %eax, %es:(%rdi)|rep stosd es:[rdi], eax}", [(X86rep_stos i32)]>, REP, AdSize64, OpSize32, Requires<[IsLP64]>; let Uses = [RAX,RCX,RDI] in def REP_STOSQ_64 : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq %rax, %es:(%rdi)|rep stosq es:[rdi], rax}", [(X86rep_stos i64)]>, REP, AdSize64, Requires<[IsLP64]>; } } // SchedRW //===----------------------------------------------------------------------===// // Thread Local Storage Instructions // let SchedRW = [WriteSystem] in { // ELF TLS Support // All calls clobber the non-callee saved registers. ESP is marked as // a use to prevent stack-pointer assignments that appear immediately // before calls from potentially appearing dead. let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7, ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7, MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS, DF], usesCustomInserter = 1, Uses = [ESP, SSP] in { def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym), "# TLS_addr32", [(X86tlsaddr tls32addr:$sym)]>, Requires<[Not64BitMode]>; def TLS_base_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym), "# TLS_base_addr32", [(X86tlsbaseaddr tls32baseaddr:$sym)]>, Requires<[Not64BitMode]>; } // All calls clobber the non-callee saved registers. RSP is marked as // a use to prevent stack-pointer assignments that appear immediately // before calls from potentially appearing dead. let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7, ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7, MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS, DF], usesCustomInserter = 1, Uses = [RSP, SSP] in { def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym), "# TLS_addr64", [(X86tlsaddr tls64addr:$sym)]>, Requires<[In64BitMode, IsLP64]>; def TLS_base_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym), "# TLS_base_addr64", [(X86tlsbaseaddr tls64baseaddr:$sym)]>, Requires<[In64BitMode, IsLP64]>; def TLS_addrX32 : I<0, Pseudo, (outs), (ins i32mem:$sym), "# TLS_addrX32", [(X86tlsaddr tls32addr:$sym)]>, Requires<[In64BitMode, NotLP64]>; def TLS_base_addrX32 : I<0, Pseudo, (outs), (ins i32mem:$sym), "# TLS_base_addrX32", [(X86tlsbaseaddr tls32baseaddr:$sym)]>, Requires<[In64BitMode, NotLP64]>; } // TLSDESC only clobbers EAX and EFLAGS. ESP is marked as a use to prevent // stack-pointer assignments that appear immediately before calls from // potentially appearing dead. let Defs = [EAX, EFLAGS], usesCustomInserter = 1, Uses = [RSP, SSP] in { def TLS_desc32 : I<0, Pseudo, (outs), (ins i32mem:$sym), "# TLS_desc32", [(X86tlsdesc tls32addr:$sym)]>; def TLS_desc64 : I<0, Pseudo, (outs), (ins i64mem:$sym), "# TLS_desc64", [(X86tlsdesc tls64addr:$sym)]>; } // Darwin TLS Support // For i386, the address of the thunk is passed on the stack, on return the // address of the variable is in %eax. %ecx is trashed during the function // call. All other registers are preserved. let Defs = [EAX, ECX, EFLAGS, DF], Uses = [ESP, SSP], usesCustomInserter = 1 in def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym), "# TLSCall_32", [(X86TLSCall addr:$sym)]>, Requires<[Not64BitMode]>; // For x86_64, the address of the thunk is passed in %rdi, but the // pseudo directly use the symbol, so do not add an implicit use of // %rdi. The lowering will do the right thing with RDI. // On return the address of the variable is in %rax. All other // registers are preserved. let Defs = [RAX, EFLAGS, DF], Uses = [RSP, SSP], usesCustomInserter = 1 in def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym), "# TLSCall_64", [(X86TLSCall addr:$sym)]>, Requires<[In64BitMode]>; } // SchedRW //===----------------------------------------------------------------------===// // Conditional Move Pseudo Instructions // CMOV* - Used to implement the SELECT DAG operation. Expanded after // instruction selection into a branch sequence. multiclass CMOVrr_PSEUDO { def CMOV#NAME : I<0, Pseudo, (outs RC:$dst), (ins RC:$t, RC:$f, i8imm:$cond), "#CMOV_"#NAME#" PSEUDO!", [(set RC:$dst, (VT (X86cmov RC:$t, RC:$f, timm:$cond, EFLAGS)))]>; } let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Uses = [EFLAGS] in { // X86 doesn't have 8-bit conditional moves. Use a customInserter to // emit control flow. An alternative to this is to mark i8 SELECT as Promote, // however that requires promoting the operands, and can induce additional // i8 register pressure. defm _GR8 : CMOVrr_PSEUDO; let Predicates = [NoCMOV] in { defm _GR32 : CMOVrr_PSEUDO; defm _GR16 : CMOVrr_PSEUDO; } // Predicates = [NoCMOV] // fcmov doesn't handle all possible EFLAGS, provide a fallback if there is no // SSE1/SSE2. let Predicates = [FPStackf32] in defm _RFP32 : CMOVrr_PSEUDO; let Predicates = [FPStackf64] in defm _RFP64 : CMOVrr_PSEUDO; defm _RFP80 : CMOVrr_PSEUDO; let Predicates = [HasMMX] in defm _VR64 : CMOVrr_PSEUDO; let Predicates = [HasSSE1,NoAVX512] in defm _FR32 : CMOVrr_PSEUDO; let Predicates = [HasSSE2,NoAVX512] in { defm _FR16 : CMOVrr_PSEUDO; defm _FR64 : CMOVrr_PSEUDO; } let Predicates = [HasAVX512] in { defm _FR16X : CMOVrr_PSEUDO; defm _FR32X : CMOVrr_PSEUDO; defm _FR64X : CMOVrr_PSEUDO; } let Predicates = [NoVLX] in { defm _VR128 : CMOVrr_PSEUDO; defm _VR256 : CMOVrr_PSEUDO; } let Predicates = [HasVLX] in { defm _VR128X : CMOVrr_PSEUDO; defm _VR256X : CMOVrr_PSEUDO; } defm _VR512 : CMOVrr_PSEUDO; defm _VK1 : CMOVrr_PSEUDO; defm _VK2 : CMOVrr_PSEUDO; defm _VK4 : CMOVrr_PSEUDO; defm _VK8 : CMOVrr_PSEUDO; defm _VK16 : CMOVrr_PSEUDO; defm _VK32 : CMOVrr_PSEUDO; defm _VK64 : CMOVrr_PSEUDO; } // usesCustomInserter = 1, hasNoSchedulingInfo = 1, Uses = [EFLAGS] def : Pat<(f128 (X86cmov VR128:$t, VR128:$f, timm:$cond, EFLAGS)), (CMOV_VR128 VR128:$t, VR128:$f, timm:$cond)>; let Predicates = [NoVLX] in { def : Pat<(v16i8 (X86cmov VR128:$t, VR128:$f, timm:$cond, EFLAGS)), (CMOV_VR128 VR128:$t, VR128:$f, timm:$cond)>; def : Pat<(v8i16 (X86cmov VR128:$t, VR128:$f, timm:$cond, EFLAGS)), (CMOV_VR128 VR128:$t, VR128:$f, timm:$cond)>; def : Pat<(v4i32 (X86cmov VR128:$t, VR128:$f, timm:$cond, EFLAGS)), (CMOV_VR128 VR128:$t, VR128:$f, timm:$cond)>; def : Pat<(v4f32 (X86cmov VR128:$t, VR128:$f, timm:$cond, EFLAGS)), (CMOV_VR128 VR128:$t, VR128:$f, timm:$cond)>; def : Pat<(v2f64 (X86cmov VR128:$t, VR128:$f, timm:$cond, EFLAGS)), (CMOV_VR128 VR128:$t, VR128:$f, timm:$cond)>; def : Pat<(v32i8 (X86cmov VR256:$t, VR256:$f, timm:$cond, EFLAGS)), (CMOV_VR256 VR256:$t, VR256:$f, timm:$cond)>; def : Pat<(v16i16 (X86cmov VR256:$t, VR256:$f, timm:$cond, EFLAGS)), (CMOV_VR256 VR256:$t, VR256:$f, timm:$cond)>; def : Pat<(v8i32 (X86cmov VR256:$t, VR256:$f, timm:$cond, EFLAGS)), (CMOV_VR256 VR256:$t, VR256:$f, timm:$cond)>; def : Pat<(v8f32 (X86cmov VR256:$t, VR256:$f, timm:$cond, EFLAGS)), (CMOV_VR256 VR256:$t, VR256:$f, timm:$cond)>; def : Pat<(v4f64 (X86cmov VR256:$t, VR256:$f, timm:$cond, EFLAGS)), (CMOV_VR256 VR256:$t, VR256:$f, timm:$cond)>; } let Predicates = [HasVLX] in { def : Pat<(v16i8 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)), (CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>; def : Pat<(v8i16 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)), (CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>; def : Pat<(v8f16 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)), (CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>; def : Pat<(v4i32 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)), (CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>; def : Pat<(v4f32 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)), (CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>; def : Pat<(v2f64 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)), (CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>; def : Pat<(v32i8 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)), (CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>; def : Pat<(v16i16 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)), (CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>; def : Pat<(v16f16 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)), (CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>; def : Pat<(v8i32 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)), (CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>; def : Pat<(v8f32 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)), (CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>; def : Pat<(v4f64 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)), (CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>; } def : Pat<(v64i8 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)), (CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>; def : Pat<(v32i16 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)), (CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>; def : Pat<(v32f16 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)), (CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>; def : Pat<(v16i32 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)), (CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>; def : Pat<(v16f32 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)), (CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>; def : Pat<(v8f64 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)), (CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>; //===----------------------------------------------------------------------===// // Normal-Instructions-With-Lock-Prefix Pseudo Instructions //===----------------------------------------------------------------------===// // FIXME: Use normal instructions and add lock prefix dynamically. // Memory barriers let isCodeGenOnly = 1, Defs = [EFLAGS] in def OR32mi8Locked : Ii8<0x83, MRM1m, (outs), (ins i32mem:$dst, i32i8imm:$zero), "or{l}\t{$zero, $dst|$dst, $zero}", []>, Requires<[Not64BitMode]>, OpSize32, LOCK, Sched<[WriteALURMW]>; // RegOpc corresponds to the mr version of the instruction // ImmOpc corresponds to the mi version of the instruction // ImmOpc8 corresponds to the mi8 version of the instruction // ImmMod corresponds to the instruction format of the mi and mi8 versions multiclass LOCK_ArithBinOp RegOpc, bits<8> ImmOpc, bits<8> ImmOpc8, Format ImmMod, SDNode Op, string mnemonic> { let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1, SchedRW = [WriteALURMW] in { def NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 }, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), !strconcat(mnemonic, "{b}\t", "{$src2, $dst|$dst, $src2}"), [(set EFLAGS, (Op addr:$dst, GR8:$src2))]>, LOCK; def NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), !strconcat(mnemonic, "{w}\t", "{$src2, $dst|$dst, $src2}"), [(set EFLAGS, (Op addr:$dst, GR16:$src2))]>, OpSize16, LOCK; def NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), !strconcat(mnemonic, "{l}\t", "{$src2, $dst|$dst, $src2}"), [(set EFLAGS, (Op addr:$dst, GR32:$src2))]>, OpSize32, LOCK; def NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), !strconcat(mnemonic, "{q}\t", "{$src2, $dst|$dst, $src2}"), [(set EFLAGS, (Op addr:$dst, GR64:$src2))]>, LOCK; // NOTE: These are order specific, we want the mi8 forms to be listed // first so that they are slightly preferred to the mi forms. def NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2), !strconcat(mnemonic, "{w}\t", "{$src2, $dst|$dst, $src2}"), [(set EFLAGS, (Op addr:$dst, i16immSExt8:$src2))]>, OpSize16, LOCK; def NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2), !strconcat(mnemonic, "{l}\t", "{$src2, $dst|$dst, $src2}"), [(set EFLAGS, (Op addr:$dst, i32immSExt8:$src2))]>, OpSize32, LOCK; def NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2), !strconcat(mnemonic, "{q}\t", "{$src2, $dst|$dst, $src2}"), [(set EFLAGS, (Op addr:$dst, i64immSExt8:$src2))]>, LOCK; def NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 }, ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2), !strconcat(mnemonic, "{b}\t", "{$src2, $dst|$dst, $src2}"), [(set EFLAGS, (Op addr:$dst, (i8 imm:$src2)))]>, LOCK; def NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2), !strconcat(mnemonic, "{w}\t", "{$src2, $dst|$dst, $src2}"), [(set EFLAGS, (Op addr:$dst, (i16 imm:$src2)))]>, OpSize16, LOCK; def NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2), !strconcat(mnemonic, "{l}\t", "{$src2, $dst|$dst, $src2}"), [(set EFLAGS, (Op addr:$dst, (i32 imm:$src2)))]>, OpSize32, LOCK; def NAME#64mi32 : RIi32S<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2), !strconcat(mnemonic, "{q}\t", "{$src2, $dst|$dst, $src2}"), [(set EFLAGS, (Op addr:$dst, i64immSExt32:$src2))]>, LOCK; } } defm LOCK_ADD : LOCK_ArithBinOp<0x00, 0x80, 0x83, MRM0m, X86lock_add, "add">; defm LOCK_SUB : LOCK_ArithBinOp<0x28, 0x80, 0x83, MRM5m, X86lock_sub, "sub">; defm LOCK_OR : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM1m, X86lock_or , "or">; defm LOCK_AND : LOCK_ArithBinOp<0x20, 0x80, 0x83, MRM4m, X86lock_and, "and">; defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, X86lock_xor, "xor">; let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1, SchedRW = [WriteALURMW] in { let Predicates = [UseIncDec] in { def LOCK_INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst", [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i8 1)))]>, LOCK; def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst", [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i16 1)))]>, OpSize16, LOCK; def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst", [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i32 1)))]>, OpSize32, LOCK; def LOCK_DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst", [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i8 1)))]>, LOCK; def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst", [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i16 1)))]>, OpSize16, LOCK; def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst", [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i32 1)))]>, OpSize32, LOCK; } let Predicates = [UseIncDec, In64BitMode] in { def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst", [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i64 1)))]>, LOCK; def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i64 1)))]>, LOCK; } } let Predicates = [UseIncDec] in { // Additional patterns for -1 constant. def : Pat<(X86lock_add addr:$dst, (i8 -1)), (LOCK_DEC8m addr:$dst)>; def : Pat<(X86lock_add addr:$dst, (i16 -1)), (LOCK_DEC16m addr:$dst)>; def : Pat<(X86lock_add addr:$dst, (i32 -1)), (LOCK_DEC32m addr:$dst)>; def : Pat<(X86lock_sub addr:$dst, (i8 -1)), (LOCK_INC8m addr:$dst)>; def : Pat<(X86lock_sub addr:$dst, (i16 -1)), (LOCK_INC16m addr:$dst)>; def : Pat<(X86lock_sub addr:$dst, (i32 -1)), (LOCK_INC32m addr:$dst)>; } let Predicates = [UseIncDec, In64BitMode] in { // Additional patterns for -1 constant. def : Pat<(X86lock_add addr:$dst, (i64 -1)), (LOCK_DEC64m addr:$dst)>; def : Pat<(X86lock_sub addr:$dst, (i64 -1)), (LOCK_INC64m addr:$dst)>; } // Atomic bit test. def X86LBTest : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, i8>, SDTCisVT<3, i32>]>; def x86bts : SDNode<"X86ISD::LBTS", X86LBTest, [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>; def x86btc : SDNode<"X86ISD::LBTC", X86LBTest, [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>; def x86btr : SDNode<"X86ISD::LBTR", X86LBTest, [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>; def X86LBTestRM : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisInt<2>]>; def x86_rm_bts : SDNode<"X86ISD::LBTS_RM", X86LBTestRM, [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>; def x86_rm_btc : SDNode<"X86ISD::LBTC_RM", X86LBTestRM, [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>; def x86_rm_btr : SDNode<"X86ISD::LBTR_RM", X86LBTestRM, [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>; multiclass ATOMIC_LOGIC_OP { let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1, SchedRW = [WriteBitTestSetRegRMW] in { def 16m : Ii8<0xBA, Form, (outs), (ins i16mem:$src1, i8imm:$src2), !strconcat(s, "{w}\t{$src2, $src1|$src1, $src2}"), [(set EFLAGS, (!cast("x86" # s) addr:$src1, timm:$src2, (i32 16)))]>, OpSize16, TB, LOCK; def 32m : Ii8<0xBA, Form, (outs), (ins i32mem:$src1, i8imm:$src2), !strconcat(s, "{l}\t{$src2, $src1|$src1, $src2}"), [(set EFLAGS, (!cast("x86" # s) addr:$src1, timm:$src2, (i32 32)))]>, OpSize32, TB, LOCK; def 64m : RIi8<0xBA, Form, (outs), (ins i64mem:$src1, i8imm:$src2), !strconcat(s, "{q}\t{$src2, $src1|$src1, $src2}"), [(set EFLAGS, (!cast("x86" # s) addr:$src1, timm:$src2, (i32 64)))]>, TB, LOCK; } } multiclass ATOMIC_LOGIC_OP_RM Opc8, string s> { let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1, SchedRW = [WriteBitTestSetRegRMW] in { def 16rm : I("x86_rm_" # s) addr:$src1, GR16:$src2))]>, OpSize16, TB, LOCK; def 32rm : I("x86_rm_" # s) addr:$src1, GR32:$src2))]>, OpSize32, TB, LOCK; def 64rm : RI("x86_rm_" # s) addr:$src1, GR64:$src2))]>, TB, LOCK; } } defm LOCK_BTS : ATOMIC_LOGIC_OP; defm LOCK_BTC : ATOMIC_LOGIC_OP; defm LOCK_BTR : ATOMIC_LOGIC_OP; defm LOCK_BTS_RM : ATOMIC_LOGIC_OP_RM<0xAB, "bts">; defm LOCK_BTC_RM : ATOMIC_LOGIC_OP_RM<0xBB, "btc">; defm LOCK_BTR_RM : ATOMIC_LOGIC_OP_RM<0xB3, "btr">; // Atomic compare and swap. multiclass LCMPXCHG_BinOp Opc8, bits<8> Opc, Format Form, string mnemonic, SDPatternOperator frag> { let isCodeGenOnly = 1, SchedRW = [WriteCMPXCHGRMW] in { let Defs = [AL, EFLAGS], Uses = [AL] in def NAME#8 : I, TB, LOCK; let Defs = [AX, EFLAGS], Uses = [AX] in def NAME#16 : I, TB, OpSize16, LOCK; let Defs = [EAX, EFLAGS], Uses = [EAX] in def NAME#32 : I, TB, OpSize32, LOCK; let Defs = [RAX, EFLAGS], Uses = [RAX] in def NAME#64 : RI, TB, LOCK; } } let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX], Predicates = [HasCX8], SchedRW = [WriteCMPXCHGRMW], isCodeGenOnly = 1, usesCustomInserter = 1 in { def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr), "cmpxchg8b\t$ptr", [(X86cas8 addr:$ptr)]>, TB, LOCK; } let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX], Predicates = [HasCX16,In64BitMode], SchedRW = [WriteCMPXCHGRMW], isCodeGenOnly = 1, mayLoad = 1, mayStore = 1, hasSideEffects = 0 in { def LCMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$ptr), "cmpxchg16b\t$ptr", []>, TB, LOCK; } // This pseudo must be used when the frame uses RBX as // the base pointer. Indeed, in such situation RBX is a reserved // register and the register allocator will ignore any use/def of // it. In other words, the register will not fix the clobbering of // RBX that will happen when setting the arguments for the instrucion. // // Unlike the actual related instruction, we mark that this one // defines RBX (instead of using RBX). // The rationale is that we will define RBX during the expansion of // the pseudo. The argument feeding RBX is rbx_input. // // The additional argument, $rbx_save, is a temporary register used to // save the value of RBX across the actual instruction. // // To make sure the register assigned to $rbx_save does not interfere with // the definition of the actual instruction, we use a definition $dst which // is tied to $rbx_save. That way, the live-range of $rbx_save spans across // the instruction and we are sure we will have a valid register to restore // the value of RBX. let Defs = [RAX, RDX, RBX, EFLAGS], Uses = [RAX, RCX, RDX], Predicates = [HasCX16,In64BitMode], SchedRW = [WriteCMPXCHGRMW], isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1, mayStore = 1, hasSideEffects = 0, Constraints = "$rbx_save = $dst" in { def LCMPXCHG16B_SAVE_RBX : I<0, Pseudo, (outs GR64:$dst), (ins i128mem:$ptr, GR64:$rbx_input, GR64:$rbx_save), "", []>; } // Pseudo instruction that doesn't read/write RBX. Will be turned into either // LCMPXCHG16B_SAVE_RBX or LCMPXCHG16B via a custom inserter. let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RCX, RDX], Predicates = [HasCX16,In64BitMode], SchedRW = [WriteCMPXCHGRMW], isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1, mayStore = 1, hasSideEffects = 0, usesCustomInserter = 1 in { def LCMPXCHG16B_NO_RBX : I<0, Pseudo, (outs), (ins i128mem:$ptr, GR64:$rbx_input), "", [(X86cas16 addr:$ptr, GR64:$rbx_input)]>; } // This pseudo must be used when the frame uses RBX/EBX as // the base pointer. // cf comment for LCMPXCHG16B_SAVE_RBX. let Defs = [EBX], Uses = [ECX, EAX], Predicates = [HasMWAITX], SchedRW = [WriteSystem], isCodeGenOnly = 1, isPseudo = 1, Constraints = "$rbx_save = $dst" in { def MWAITX_SAVE_RBX : I<0, Pseudo, (outs GR64:$dst), (ins GR32:$ebx_input, GR64:$rbx_save), "mwaitx", []>; } // Pseudo mwaitx instruction to use for custom insertion. let Predicates = [HasMWAITX], SchedRW = [WriteSystem], isCodeGenOnly = 1, isPseudo = 1, usesCustomInserter = 1 in { def MWAITX : I<0, Pseudo, (outs), (ins GR32:$ecx, GR32:$eax, GR32:$ebx), "mwaitx", [(int_x86_mwaitx GR32:$ecx, GR32:$eax, GR32:$ebx)]>; } defm LCMPXCHG : LCMPXCHG_BinOp<0xB0, 0xB1, MRMDestMem, "cmpxchg", X86cas>; // Atomic exchange and add multiclass ATOMIC_RMW_BINOP opc8, bits<8> opc, string mnemonic, string frag> { let Constraints = "$val = $dst", Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1, SchedRW = [WriteALURMW] in { def NAME#8 : I(frag # "_i8") addr:$ptr, GR8:$val))]>; def NAME#16 : I(frag # "_i16") addr:$ptr, GR16:$val))]>, OpSize16; def NAME#32 : I(frag # "_i32") addr:$ptr, GR32:$val))]>, OpSize32; def NAME#64 : RI(frag # "_i64") addr:$ptr, GR64:$val))]>; } } defm LXADD : ATOMIC_RMW_BINOP<0xc0, 0xc1, "xadd", "atomic_load_add">, TB, LOCK; /* The following multiclass tries to make sure that in code like * x.store (immediate op x.load(acquire), release) * and * x.store (register op x.load(acquire), release) * an operation directly on memory is generated instead of wasting a register. * It is not automatic as atomic_store/load are only lowered to MOV instructions * extremely late to prevent them from being accidentally reordered in the backend * (see below the RELEASE_MOV* / ACQUIRE_MOV* pseudo-instructions) */ multiclass RELEASE_BINOP_MI { def : Pat<(atomic_store_8 (op (atomic_load_8 addr:$dst), (i8 imm:$src)), addr:$dst), (!cast(Name#"8mi") addr:$dst, imm:$src)>; def : Pat<(atomic_store_16 (op (atomic_load_16 addr:$dst), (i16 imm:$src)), addr:$dst), (!cast(Name#"16mi") addr:$dst, imm:$src)>; def : Pat<(atomic_store_32 (op (atomic_load_32 addr:$dst), (i32 imm:$src)), addr:$dst), (!cast(Name#"32mi") addr:$dst, imm:$src)>; def : Pat<(atomic_store_64 (op (atomic_load_64 addr:$dst), (i64immSExt32:$src)), addr:$dst), (!cast(Name#"64mi32") addr:$dst, (i64immSExt32:$src))>; def : Pat<(atomic_store_8 (op (atomic_load_8 addr:$dst), (i8 GR8:$src)), addr:$dst), (!cast(Name#"8mr") addr:$dst, GR8:$src)>; def : Pat<(atomic_store_16 (op (atomic_load_16 addr:$dst), (i16 GR16:$src)), addr:$dst), (!cast(Name#"16mr") addr:$dst, GR16:$src)>; def : Pat<(atomic_store_32 (op (atomic_load_32 addr:$dst), (i32 GR32:$src)), addr:$dst), (!cast(Name#"32mr") addr:$dst, GR32:$src)>; def : Pat<(atomic_store_64 (op (atomic_load_64 addr:$dst), (i64 GR64:$src)), addr:$dst), (!cast(Name#"64mr") addr:$dst, GR64:$src)>; } defm : RELEASE_BINOP_MI<"ADD", add>; defm : RELEASE_BINOP_MI<"AND", and>; defm : RELEASE_BINOP_MI<"OR", or>; defm : RELEASE_BINOP_MI<"XOR", xor>; defm : RELEASE_BINOP_MI<"SUB", sub>; // Atomic load + floating point patterns. // FIXME: This could also handle SIMD operations with *ps and *pd instructions. multiclass ATOMIC_LOAD_FP_BINOP_MI { def : Pat<(op FR32:$src1, (bitconvert (i32 (atomic_load_32 addr:$src2)))), (!cast(Name#"SSrm") FR32:$src1, addr:$src2)>, Requires<[UseSSE1]>; def : Pat<(op FR32:$src1, (bitconvert (i32 (atomic_load_32 addr:$src2)))), (!cast("V"#Name#"SSrm") FR32:$src1, addr:$src2)>, Requires<[UseAVX]>; def : Pat<(op FR32X:$src1, (bitconvert (i32 (atomic_load_32 addr:$src2)))), (!cast("V"#Name#"SSZrm") FR32X:$src1, addr:$src2)>, Requires<[HasAVX512]>; def : Pat<(op FR64:$src1, (bitconvert (i64 (atomic_load_64 addr:$src2)))), (!cast(Name#"SDrm") FR64:$src1, addr:$src2)>, Requires<[UseSSE1]>; def : Pat<(op FR64:$src1, (bitconvert (i64 (atomic_load_64 addr:$src2)))), (!cast("V"#Name#"SDrm") FR64:$src1, addr:$src2)>, Requires<[UseAVX]>; def : Pat<(op FR64X:$src1, (bitconvert (i64 (atomic_load_64 addr:$src2)))), (!cast("V"#Name#"SDZrm") FR64X:$src1, addr:$src2)>, Requires<[HasAVX512]>; } defm : ATOMIC_LOAD_FP_BINOP_MI<"ADD", fadd>; defm : ATOMIC_LOAD_FP_BINOP_MI<"SUB", fsub>; defm : ATOMIC_LOAD_FP_BINOP_MI<"MUL", fmul>; defm : ATOMIC_LOAD_FP_BINOP_MI<"DIV", fdiv>; multiclass RELEASE_UNOP { def : Pat<(atomic_store_8 dag8, addr:$dst), (!cast(Name#8m) addr:$dst)>; def : Pat<(atomic_store_16 dag16, addr:$dst), (!cast(Name#16m) addr:$dst)>; def : Pat<(atomic_store_32 dag32, addr:$dst), (!cast(Name#32m) addr:$dst)>; def : Pat<(atomic_store_64 dag64, addr:$dst), (!cast(Name#64m) addr:$dst)>; } let Predicates = [UseIncDec] in { defm : RELEASE_UNOP<"INC", (add (atomic_load_8 addr:$dst), (i8 1)), (add (atomic_load_16 addr:$dst), (i16 1)), (add (atomic_load_32 addr:$dst), (i32 1)), (add (atomic_load_64 addr:$dst), (i64 1))>; defm : RELEASE_UNOP<"DEC", (add (atomic_load_8 addr:$dst), (i8 -1)), (add (atomic_load_16 addr:$dst), (i16 -1)), (add (atomic_load_32 addr:$dst), (i32 -1)), (add (atomic_load_64 addr:$dst), (i64 -1))>; } defm : RELEASE_UNOP<"NEG", (ineg (i8 (atomic_load_8 addr:$dst))), (ineg (i16 (atomic_load_16 addr:$dst))), (ineg (i32 (atomic_load_32 addr:$dst))), (ineg (i64 (atomic_load_64 addr:$dst)))>; defm : RELEASE_UNOP<"NOT", (not (i8 (atomic_load_8 addr:$dst))), (not (i16 (atomic_load_16 addr:$dst))), (not (i32 (atomic_load_32 addr:$dst))), (not (i64 (atomic_load_64 addr:$dst)))>; def : Pat<(atomic_store_8 (i8 imm:$src), addr:$dst), (MOV8mi addr:$dst, imm:$src)>; def : Pat<(atomic_store_16 (i16 imm:$src), addr:$dst), (MOV16mi addr:$dst, imm:$src)>; def : Pat<(atomic_store_32 (i32 imm:$src), addr:$dst), (MOV32mi addr:$dst, imm:$src)>; def : Pat<(atomic_store_64 (i64immSExt32:$src), addr:$dst), (MOV64mi32 addr:$dst, i64immSExt32:$src)>; def : Pat<(atomic_store_8 GR8:$src, addr:$dst), (MOV8mr addr:$dst, GR8:$src)>; def : Pat<(atomic_store_16 GR16:$src, addr:$dst), (MOV16mr addr:$dst, GR16:$src)>; def : Pat<(atomic_store_32 GR32:$src, addr:$dst), (MOV32mr addr:$dst, GR32:$src)>; def : Pat<(atomic_store_64 GR64:$src, addr:$dst), (MOV64mr addr:$dst, GR64:$src)>; def : Pat<(i8 (atomic_load_8 addr:$src)), (MOV8rm addr:$src)>; def : Pat<(i16 (atomic_load_16 addr:$src)), (MOV16rm addr:$src)>; def : Pat<(i32 (atomic_load_32 addr:$src)), (MOV32rm addr:$src)>; def : Pat<(i64 (atomic_load_64 addr:$src)), (MOV64rm addr:$src)>; // Floating point loads/stores. def : Pat<(atomic_store_32 (i32 (bitconvert (f32 FR32:$src))), addr:$dst), (MOVSSmr addr:$dst, FR32:$src)>, Requires<[UseSSE1]>; def : Pat<(atomic_store_32 (i32 (bitconvert (f32 FR32:$src))), addr:$dst), (VMOVSSmr addr:$dst, FR32:$src)>, Requires<[UseAVX]>; def : Pat<(atomic_store_32 (i32 (bitconvert (f32 FR32:$src))), addr:$dst), (VMOVSSZmr addr:$dst, FR32:$src)>, Requires<[HasAVX512]>; def : Pat<(atomic_store_64 (i64 (bitconvert (f64 FR64:$src))), addr:$dst), (MOVSDmr addr:$dst, FR64:$src)>, Requires<[UseSSE2]>; def : Pat<(atomic_store_64 (i64 (bitconvert (f64 FR64:$src))), addr:$dst), (VMOVSDmr addr:$dst, FR64:$src)>, Requires<[UseAVX]>; def : Pat<(atomic_store_64 (i64 (bitconvert (f64 FR64:$src))), addr:$dst), (VMOVSDmr addr:$dst, FR64:$src)>, Requires<[HasAVX512]>; def : Pat<(f32 (bitconvert (i32 (atomic_load_32 addr:$src)))), (MOVSSrm_alt addr:$src)>, Requires<[UseSSE1]>; def : Pat<(f32 (bitconvert (i32 (atomic_load_32 addr:$src)))), (VMOVSSrm_alt addr:$src)>, Requires<[UseAVX]>; def : Pat<(f32 (bitconvert (i32 (atomic_load_32 addr:$src)))), (VMOVSSZrm_alt addr:$src)>, Requires<[HasAVX512]>; def : Pat<(f64 (bitconvert (i64 (atomic_load_64 addr:$src)))), (MOVSDrm_alt addr:$src)>, Requires<[UseSSE2]>; def : Pat<(f64 (bitconvert (i64 (atomic_load_64 addr:$src)))), (VMOVSDrm_alt addr:$src)>, Requires<[UseAVX]>; def : Pat<(f64 (bitconvert (i64 (atomic_load_64 addr:$src)))), (VMOVSDZrm_alt addr:$src)>, Requires<[HasAVX512]>; //===----------------------------------------------------------------------===// // DAG Pattern Matching Rules //===----------------------------------------------------------------------===// // Use AND/OR to store 0/-1 in memory when optimizing for minsize. This saves // binary size compared to a regular MOV, but it introduces an unnecessary // load, so is not suitable for regular or optsize functions. let Predicates = [OptForMinSize] in { def : Pat<(simple_store (i16 0), addr:$dst), (AND16mi addr:$dst, 0)>; def : Pat<(simple_store (i32 0), addr:$dst), (AND32mi addr:$dst, 0)>; def : Pat<(simple_store (i64 0), addr:$dst), (AND64mi32 addr:$dst, 0)>; def : Pat<(simple_store (i16 -1), addr:$dst), (OR16mi addr:$dst, -1)>; def : Pat<(simple_store (i32 -1), addr:$dst), (OR32mi addr:$dst, -1)>; def : Pat<(simple_store (i64 -1), addr:$dst), (OR64mi32 addr:$dst, -1)>; } // In kernel code model, we can get the address of a label // into a register with 'movq'. FIXME: This is a hack, the 'imm' predicate of // the MOV64ri32 should accept these. def : Pat<(i64 (X86Wrapper tconstpool :$dst)), (MOV64ri32 tconstpool :$dst)>, Requires<[KernelCode]>; def : Pat<(i64 (X86Wrapper tjumptable :$dst)), (MOV64ri32 tjumptable :$dst)>, Requires<[KernelCode]>; def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)), (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>; def : Pat<(i64 (X86Wrapper texternalsym:$dst)), (MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>; def : Pat<(i64 (X86Wrapper mcsym:$dst)), (MOV64ri32 mcsym:$dst)>, Requires<[KernelCode]>; def : Pat<(i64 (X86Wrapper tblockaddress:$dst)), (MOV64ri32 tblockaddress:$dst)>, Requires<[KernelCode]>; // If we have small model and -static mode, it is safe to store global addresses // directly as immediates. FIXME: This is really a hack, the 'imm' predicate // for MOV64mi32 should handle this sort of thing. def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst), (MOV64mi32 addr:$dst, tconstpool:$src)>, Requires<[NearData, IsNotPIC]>; def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst), (MOV64mi32 addr:$dst, tjumptable:$src)>, Requires<[NearData, IsNotPIC]>; def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst), (MOV64mi32 addr:$dst, tglobaladdr:$src)>, Requires<[NearData, IsNotPIC]>; def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst), (MOV64mi32 addr:$dst, texternalsym:$src)>, Requires<[NearData, IsNotPIC]>; def : Pat<(store (i64 (X86Wrapper mcsym:$src)), addr:$dst), (MOV64mi32 addr:$dst, mcsym:$src)>, Requires<[NearData, IsNotPIC]>; def : Pat<(store (i64 (X86Wrapper tblockaddress:$src)), addr:$dst), (MOV64mi32 addr:$dst, tblockaddress:$src)>, Requires<[NearData, IsNotPIC]>; def : Pat<(i32 (X86RecoverFrameAlloc mcsym:$dst)), (MOV32ri mcsym:$dst)>; def : Pat<(i64 (X86RecoverFrameAlloc mcsym:$dst)), (MOV64ri mcsym:$dst)>; // Calls // tls has some funny stuff here... // This corresponds to movabs $foo@tpoff, %rax def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)), (MOV64ri32 tglobaltlsaddr :$dst)>; // This corresponds to add $foo@tpoff, %rax def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)), (ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>; // Direct PC relative function call for small code model. 32-bit displacement // sign extended to 64-bit. def : Pat<(X86call (i64 tglobaladdr:$dst)), (CALL64pcrel32 tglobaladdr:$dst)>; def : Pat<(X86call (i64 texternalsym:$dst)), (CALL64pcrel32 texternalsym:$dst)>; def : Pat<(X86call_rvmarker (i64 tglobaladdr:$rvfunc), (i64 texternalsym:$dst)), (CALL64pcrel32_RVMARKER tglobaladdr:$rvfunc, texternalsym:$dst)>; def : Pat<(X86call_rvmarker (i64 tglobaladdr:$rvfunc), (i64 tglobaladdr:$dst)), (CALL64pcrel32_RVMARKER tglobaladdr:$rvfunc, tglobaladdr:$dst)>; // Tailcall stuff. The TCRETURN instructions execute after the epilog, so they // can never use callee-saved registers. That is the purpose of the GR64_TC // register classes. // // The only volatile register that is never used by the calling convention is // %r11. This happens when calling a vararg function with 6 arguments. // // Match an X86tcret that uses less than 7 volatile registers. def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off), (TCRETURNri ptr_rc_tailcall:$dst, timm:$off)>, Requires<[Not64BitMode, NotUseIndirectThunkCalls]>; // FIXME: This is disabled for 32-bit PIC mode because the global base // register which is part of the address mode may be assigned a // callee-saved register. // Similar to X86tcret_6regs, here we only have 1 register left def : Pat<(X86tcret_1reg (load addr:$dst), timm:$off), (TCRETURNmi addr:$dst, timm:$off)>, Requires<[Not64BitMode, IsNotPIC, NotUseIndirectThunkCalls]>; def : Pat<(X86tcret (i32 tglobaladdr:$dst), timm:$off), (TCRETURNdi tglobaladdr:$dst, timm:$off)>, Requires<[NotLP64]>; def : Pat<(X86tcret (i32 texternalsym:$dst), timm:$off), (TCRETURNdi texternalsym:$dst, timm:$off)>, Requires<[NotLP64]>; def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off), (TCRETURNri64 ptr_rc_tailcall:$dst, timm:$off)>, Requires<[In64BitMode, NotUseIndirectThunkCalls]>; // Don't fold loads into X86tcret requiring more than 6 regs. // There wouldn't be enough scratch registers for base+index. def : Pat<(X86tcret_6regs (load addr:$dst), timm:$off), (TCRETURNmi64 addr:$dst, timm:$off)>, Requires<[In64BitMode, NotUseIndirectThunkCalls]>; def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off), (INDIRECT_THUNK_TCRETURN64 ptr_rc_tailcall:$dst, timm:$off)>, Requires<[In64BitMode, UseIndirectThunkCalls]>; def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off), (INDIRECT_THUNK_TCRETURN32 ptr_rc_tailcall:$dst, timm:$off)>, Requires<[Not64BitMode, UseIndirectThunkCalls]>; def : Pat<(X86tcret (i64 tglobaladdr:$dst), timm:$off), (TCRETURNdi64 tglobaladdr:$dst, timm:$off)>, Requires<[IsLP64]>; def : Pat<(X86tcret (i64 texternalsym:$dst), timm:$off), (TCRETURNdi64 texternalsym:$dst, timm:$off)>, Requires<[IsLP64]>; // Normal calls, with various flavors of addresses. def : Pat<(X86call (i32 tglobaladdr:$dst)), (CALLpcrel32 tglobaladdr:$dst)>; def : Pat<(X86call (i32 texternalsym:$dst)), (CALLpcrel32 texternalsym:$dst)>; def : Pat<(X86call (i32 imm:$dst)), (CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr]>; // Comparisons. // TEST R,R is smaller than CMP R,0 def : Pat<(X86cmp GR8:$src1, 0), (TEST8rr GR8:$src1, GR8:$src1)>; def : Pat<(X86cmp GR16:$src1, 0), (TEST16rr GR16:$src1, GR16:$src1)>; def : Pat<(X86cmp GR32:$src1, 0), (TEST32rr GR32:$src1, GR32:$src1)>; def : Pat<(X86cmp GR64:$src1, 0), (TEST64rr GR64:$src1, GR64:$src1)>; // zextload bool -> zextload byte // i1 stored in one byte in zero-extended form. // Upper bits cleanup should be executed before Store. def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>; def : Pat<(zextloadi16i1 addr:$src), (EXTRACT_SUBREG (MOVZX32rm8 addr:$src), sub_16bit)>; def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>; def : Pat<(zextloadi64i1 addr:$src), (SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>; // extload bool -> extload byte // When extloading from 16-bit and smaller memory locations into 64-bit // registers, use zero-extending loads so that the entire 64-bit register is // defined, avoiding partial-register updates. def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>; def : Pat<(extloadi16i1 addr:$src), (EXTRACT_SUBREG (MOVZX32rm8 addr:$src), sub_16bit)>; def : Pat<(extloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>; def : Pat<(extloadi16i8 addr:$src), (EXTRACT_SUBREG (MOVZX32rm8 addr:$src), sub_16bit)>; def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>; def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>; // For other extloads, use subregs, since the high contents of the register are // defined after an extload. // NOTE: The extloadi64i32 pattern needs to be first as it will try to form // 32-bit loads for 4 byte aligned i8/i16 loads. def : Pat<(extloadi64i32 addr:$src), (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), sub_32bit)>; def : Pat<(extloadi64i1 addr:$src), (SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>; def : Pat<(extloadi64i8 addr:$src), (SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>; def : Pat<(extloadi64i16 addr:$src), (SUBREG_TO_REG (i64 0), (MOVZX32rm16 addr:$src), sub_32bit)>; // anyext. Define these to do an explicit zero-extend to // avoid partial-register updates. def : Pat<(i16 (anyext GR8 :$src)), (EXTRACT_SUBREG (MOVZX32rr8 GR8 :$src), sub_16bit)>; def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>; // Except for i16 -> i32 since isel expect i16 ops to be promoted to i32. def : Pat<(i32 (anyext GR16:$src)), (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>; def : Pat<(i64 (anyext GR8 :$src)), (SUBREG_TO_REG (i64 0), (MOVZX32rr8 GR8 :$src), sub_32bit)>; def : Pat<(i64 (anyext GR16:$src)), (SUBREG_TO_REG (i64 0), (MOVZX32rr16 GR16 :$src), sub_32bit)>; def : Pat<(i64 (anyext GR32:$src)), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, sub_32bit)>; def : Pat<(i32 (anyext_sdiv GR8:$src)), (MOVSX32rr8 GR8:$src)>; // In the case of a 32-bit def that is known to implicitly zero-extend, // we can use a SUBREG_TO_REG. def : Pat<(i64 (zext def32:$src)), (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>; def : Pat<(i64 (and (anyext def32:$src), 0x00000000FFFFFFFF)), (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>; //===----------------------------------------------------------------------===// // Pattern match OR as ADD //===----------------------------------------------------------------------===// // If safe, we prefer to pattern match OR as ADD at isel time. ADD can be // 3-addressified into an LEA instruction to avoid copies. However, we also // want to finally emit these instructions as an or at the end of the code // generator to make the generated code easier to read. To do this, we select // into "disjoint bits" pseudo ops. // (or x1, x2) -> (add x1, x2) if two operands are known not to share bits. // Try this before the selecting to OR. let SchedRW = [WriteALU] in { let isConvertibleToThreeAddress = 1, isPseudo = 1, Constraints = "$src1 = $dst", Defs = [EFLAGS] in { let isCommutable = 1 in { def ADD8rr_DB : I<0, Pseudo, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), "", // orb/addb REG, REG [(set GR8:$dst, (or_is_add GR8:$src1, GR8:$src2))]>; def ADD16rr_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), "", // orw/addw REG, REG [(set GR16:$dst, (or_is_add GR16:$src1, GR16:$src2))]>; def ADD32rr_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "", // orl/addl REG, REG [(set GR32:$dst, (or_is_add GR32:$src1, GR32:$src2))]>; def ADD64rr_DB : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "", // orq/addq REG, REG [(set GR64:$dst, (or_is_add GR64:$src1, GR64:$src2))]>; } // isCommutable def ADD8ri_DB : I<0, Pseudo, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), "", // orb/addb REG, imm8 [(set GR8:$dst, (or_is_add GR8:$src1, imm:$src2))]>; def ADD16ri_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), "", // orw/addw REG, imm [(set GR16:$dst, (or_is_add GR16:$src1, imm:$src2))]>; def ADD32ri_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), "", // orl/addl REG, imm [(set GR32:$dst, (or_is_add GR32:$src1, imm:$src2))]>; def ADD64ri32_DB : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), "", // orq/addq REG, imm [(set GR64:$dst, (or_is_add GR64:$src1, i64immSExt32:$src2))]>; } } // AddedComplexity, SchedRW //===----------------------------------------------------------------------===// // Pattern match XOR as ADD //===----------------------------------------------------------------------===// // Prefer to pattern match XOR with min_signed_value as ADD at isel time. // ADD can be 3-addressified into an LEA instruction to avoid copies. let AddedComplexity = 5 in { def : Pat<(xor GR8:$src1, -128), (ADD8ri GR8:$src1, -128)>; def : Pat<(xor GR16:$src1, -32768), (ADD16ri GR16:$src1, -32768)>; def : Pat<(xor GR32:$src1, -2147483648), (ADD32ri GR32:$src1, -2147483648)>; } //===----------------------------------------------------------------------===// // Some peepholes //===----------------------------------------------------------------------===// // Odd encoding trick: -128 fits into an 8-bit immediate field while // +128 doesn't, so in this special case use a sub instead of an add. let Predicates = [NoNDD] in { def : Pat<(add GR16:$src1, 128), (SUB16ri GR16:$src1, -128)>; def : Pat<(add GR32:$src1, 128), (SUB32ri GR32:$src1, -128)>; def : Pat<(add GR64:$src1, 128), (SUB64ri32 GR64:$src1, -128)>; def : Pat<(X86add_flag_nocf GR16:$src1, 128), (SUB16ri GR16:$src1, -128)>; def : Pat<(X86add_flag_nocf GR32:$src1, 128), (SUB32ri GR32:$src1, -128)>; def : Pat<(X86add_flag_nocf GR64:$src1, 128), (SUB64ri32 GR64:$src1, -128)>; } let Predicates = [HasNDD] in { def : Pat<(add GR16:$src1, 128), (SUB16ri_ND GR16:$src1, -128)>; def : Pat<(add GR32:$src1, 128), (SUB32ri_ND GR32:$src1, -128)>; def : Pat<(add GR64:$src1, 128), (SUB64ri32_ND GR64:$src1, -128)>; def : Pat<(X86add_flag_nocf GR16:$src1, 128), (SUB16ri_ND GR16:$src1, -128)>; def : Pat<(X86add_flag_nocf GR32:$src1, 128), (SUB32ri_ND GR32:$src1, -128)>; def : Pat<(X86add_flag_nocf GR64:$src1, 128), (SUB64ri32_ND GR64:$src1, -128)>; } def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst), (SUB16mi addr:$dst, -128)>; def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst), (SUB32mi addr:$dst, -128)>; def : Pat<(store (add (loadi64 addr:$dst), 128), addr:$dst), (SUB64mi32 addr:$dst, -128)>; let Predicates = [HasNDD] in { def : Pat<(add (loadi16 addr:$src), 128), (SUB16mi_ND addr:$src, -128)>; def : Pat<(add (loadi32 addr:$src), 128), (SUB32mi_ND addr:$src, -128)>; def : Pat<(add (loadi64 addr:$src), 128), (SUB64mi32_ND addr:$src, -128)>; } // The same trick applies for 32-bit immediate fields in 64-bit // instructions. let Predicates = [NoNDD] in { def : Pat<(add GR64:$src1, 0x0000000080000000), (SUB64ri32 GR64:$src1, 0xffffffff80000000)>; def : Pat<(X86add_flag_nocf GR64:$src1, 0x0000000080000000), (SUB64ri32 GR64:$src1, 0xffffffff80000000)>; } let Predicates = [HasNDD] in { def : Pat<(add GR64:$src1, 0x0000000080000000), (SUB64ri32_ND GR64:$src1, 0xffffffff80000000)>; def : Pat<(X86add_flag_nocf GR64:$src1, 0x0000000080000000), (SUB64ri32_ND GR64:$src1, 0xffffffff80000000)>; } def : Pat<(store (add (loadi64 addr:$dst), 0x0000000080000000), addr:$dst), (SUB64mi32 addr:$dst, 0xffffffff80000000)>; let Predicates = [HasNDD] in { def : Pat<(add(loadi64 addr:$src), 0x0000000080000000), (SUB64mi32_ND addr:$src, 0xffffffff80000000)>; } // Depositing value to 8/16 bit subreg: def : Pat<(or (and GR64:$dst, -256), (i64 (zextloadi8 addr:$src))), (INSERT_SUBREG (i64 (COPY $dst)), (MOV8rm i8mem:$src), sub_8bit)>; def : Pat<(or (and GR32:$dst, -256), (i32 (zextloadi8 addr:$src))), (INSERT_SUBREG (i32 (COPY $dst)), (MOV8rm i8mem:$src), sub_8bit)>; def : Pat<(or (and GR64:$dst, -65536), (i64 (zextloadi16 addr:$src))), (INSERT_SUBREG (i64 (COPY $dst)), (MOV16rm i16mem:$src), sub_16bit)>; def : Pat<(or (and GR32:$dst, -65536), (i32 (zextloadi16 addr:$src))), (INSERT_SUBREG (i32 (COPY $dst)), (MOV16rm i16mem:$src), sub_16bit)>; // To avoid needing to materialize an immediate in a register, use a 32-bit and // with implicit zero-extension instead of a 64-bit and if the immediate has at // least 32 bits of leading zeros. If in addition the last 32 bits can be // represented with a sign extension of a 8 bit constant, use that. // This can also reduce instruction size by eliminating the need for the REX // prefix. // AddedComplexity is needed to give priority over i64immSExt8 and i64immSExt32. let AddedComplexity = 1 in { let Predicates = [NoNDD] in { def : Pat<(and GR64:$src, i64immZExt32:$imm), (SUBREG_TO_REG (i64 0), (AND32ri (EXTRACT_SUBREG GR64:$src, sub_32bit), (i32 (GetLo32XForm imm:$imm))), sub_32bit)>; } let Predicates = [HasNDD] in { def : Pat<(and GR64:$src, i64immZExt32:$imm), (SUBREG_TO_REG (i64 0), (AND32ri_ND (EXTRACT_SUBREG GR64:$src, sub_32bit), (i32 (GetLo32XForm imm:$imm))), sub_32bit)>; } } // AddedComplexity = 1 // AddedComplexity is needed due to the increased complexity on the // i64immZExt32SExt8 and i64immZExt32 patterns above. Applying this to all // the MOVZX patterns keeps thems together in DAGIsel tables. let AddedComplexity = 1 in { // r & (2^16-1) ==> movz def : Pat<(and GR32:$src1, 0xffff), (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>; // r & (2^8-1) ==> movz def : Pat<(and GR32:$src1, 0xff), (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>; // r & (2^8-1) ==> movz def : Pat<(and GR16:$src1, 0xff), (EXTRACT_SUBREG (MOVZX32rr8 (EXTRACT_SUBREG GR16:$src1, sub_8bit)), sub_16bit)>; // r & (2^32-1) ==> movz def : Pat<(and GR64:$src, 0x00000000FFFFFFFF), (SUBREG_TO_REG (i64 0), (MOV32rr (EXTRACT_SUBREG GR64:$src, sub_32bit)), sub_32bit)>; // r & (2^16-1) ==> movz def : Pat<(and GR64:$src, 0xffff), (SUBREG_TO_REG (i64 0), (MOVZX32rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit))), sub_32bit)>; // r & (2^8-1) ==> movz def : Pat<(and GR64:$src, 0xff), (SUBREG_TO_REG (i64 0), (MOVZX32rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit))), sub_32bit)>; } // AddedComplexity = 1 // Try to use BTS/BTR/BTC for single bit operations on the upper 32-bits. def BTRXForm : SDNodeXFormgetAPIntValue().countr_one(), SDLoc(N)); }]>; def BTCBTSXForm : SDNodeXFormgetAPIntValue().countr_zero(), SDLoc(N)); }]>; def BTRMask64 : ImmLeaf(Imm) && !isInt<32>(Imm) && isPowerOf2_64(~Imm); }]>; def BTCBTSMask64 : ImmLeaf(Imm) && isPowerOf2_64(Imm); }]>; // For now only do this for optsize. let AddedComplexity = 1, Predicates=[OptForSize] in { def : Pat<(and GR64:$src1, BTRMask64:$mask), (BTR64ri8 GR64:$src1, (BTRXForm imm:$mask))>; def : Pat<(or GR64:$src1, BTCBTSMask64:$mask), (BTS64ri8 GR64:$src1, (BTCBTSXForm imm:$mask))>; def : Pat<(xor GR64:$src1, BTCBTSMask64:$mask), (BTC64ri8 GR64:$src1, (BTCBTSXForm imm:$mask))>; } // sext_inreg patterns def : Pat<(sext_inreg GR32:$src, i16), (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, sub_16bit))>; def : Pat<(sext_inreg GR32:$src, i8), (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>; def : Pat<(sext_inreg GR16:$src, i8), (EXTRACT_SUBREG (MOVSX32rr8 (EXTRACT_SUBREG GR16:$src, sub_8bit)), sub_16bit)>; def : Pat<(sext_inreg GR64:$src, i32), (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>; def : Pat<(sext_inreg GR64:$src, i16), (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, sub_16bit))>; def : Pat<(sext_inreg GR64:$src, i8), (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, sub_8bit))>; // sext, sext_load, zext, zext_load def: Pat<(i16 (sext GR8:$src)), (EXTRACT_SUBREG (MOVSX32rr8 GR8:$src), sub_16bit)>; def: Pat<(sextloadi16i8 addr:$src), (EXTRACT_SUBREG (MOVSX32rm8 addr:$src), sub_16bit)>; def: Pat<(i16 (zext GR8:$src)), (EXTRACT_SUBREG (MOVZX32rr8 GR8:$src), sub_16bit)>; def: Pat<(zextloadi16i8 addr:$src), (EXTRACT_SUBREG (MOVZX32rm8 addr:$src), sub_16bit)>; // trunc patterns def : Pat<(i16 (trunc GR32:$src)), (EXTRACT_SUBREG GR32:$src, sub_16bit)>; def : Pat<(i8 (trunc GR32:$src)), (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), sub_8bit)>, Requires<[Not64BitMode]>; def : Pat<(i8 (trunc GR16:$src)), (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), sub_8bit)>, Requires<[Not64BitMode]>; def : Pat<(i32 (trunc GR64:$src)), (EXTRACT_SUBREG GR64:$src, sub_32bit)>; def : Pat<(i16 (trunc GR64:$src)), (EXTRACT_SUBREG GR64:$src, sub_16bit)>; def : Pat<(i8 (trunc GR64:$src)), (EXTRACT_SUBREG GR64:$src, sub_8bit)>; def : Pat<(i8 (trunc GR32:$src)), (EXTRACT_SUBREG GR32:$src, sub_8bit)>, Requires<[In64BitMode]>; def : Pat<(i8 (trunc GR16:$src)), (EXTRACT_SUBREG GR16:$src, sub_8bit)>, Requires<[In64BitMode]>; def immff00_ffff : ImmLeaf= 0xff00 && Imm <= 0xffff; }]>; // h-register tricks def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))), (EXTRACT_SUBREG GR16:$src, sub_8bit_hi)>, Requires<[Not64BitMode]>; def : Pat<(i8 (trunc (srl_su (i32 (anyext GR16:$src)), (i8 8)))), (EXTRACT_SUBREG GR16:$src, sub_8bit_hi)>, Requires<[Not64BitMode]>; def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))), (EXTRACT_SUBREG GR32:$src, sub_8bit_hi)>, Requires<[Not64BitMode]>; def : Pat<(srl GR16:$src, (i8 8)), (EXTRACT_SUBREG (MOVZX32rr8_NOREX (EXTRACT_SUBREG GR16:$src, sub_8bit_hi)), sub_16bit)>; def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))), (MOVZX32rr8_NOREX (EXTRACT_SUBREG GR16:$src, sub_8bit_hi))>; def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))), (MOVZX32rr8_NOREX (EXTRACT_SUBREG GR16:$src, sub_8bit_hi))>; def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), (MOVZX32rr8_NOREX (EXTRACT_SUBREG GR32:$src, sub_8bit_hi))>; def : Pat<(srl (and_su GR32:$src, immff00_ffff), (i8 8)), (MOVZX32rr8_NOREX (EXTRACT_SUBREG GR32:$src, sub_8bit_hi))>; // h-register tricks. // For now, be conservative on x86-64 and use an h-register extract only if the // value is immediately zero-extended or stored, which are somewhat common // cases. This uses a bunch of code to prevent a register requiring a REX prefix // from being allocated in the same instruction as the h register, as there's // currently no way to describe this requirement to the register allocator. // h-register extract and zero-extend. def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)), (SUBREG_TO_REG (i64 0), (MOVZX32rr8_NOREX (EXTRACT_SUBREG GR64:$src, sub_8bit_hi)), sub_32bit)>; def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))), (SUBREG_TO_REG (i64 0), (MOVZX32rr8_NOREX (EXTRACT_SUBREG GR16:$src, sub_8bit_hi)), sub_32bit)>; def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))), (SUBREG_TO_REG (i64 0), (MOVZX32rr8_NOREX (EXTRACT_SUBREG GR16:$src, sub_8bit_hi)), sub_32bit)>; // h-register extract and store. def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst), (MOV8mr_NOREX addr:$dst, (EXTRACT_SUBREG GR64:$src, sub_8bit_hi))>; def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst), (MOV8mr_NOREX addr:$dst, (EXTRACT_SUBREG GR32:$src, sub_8bit_hi))>, Requires<[In64BitMode]>; def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst), (MOV8mr_NOREX addr:$dst, (EXTRACT_SUBREG GR16:$src, sub_8bit_hi))>, Requires<[In64BitMode]>; // Special pattern to catch the last step of __builtin_parity handling. Our // goal is to use an xor of an h-register with the corresponding l-register. // The above patterns would handle this on non 64-bit targets, but for 64-bit // we need to be more careful. We're using a NOREX instruction here in case // register allocation fails to keep the two registers together. So we need to // make sure we can't accidentally mix R8-R15 with an h-register. def : Pat<(X86xor_flag (i8 (trunc GR32:$src)), (i8 (trunc (srl_su GR32:$src, (i8 8))))), (XOR8rr_NOREX (EXTRACT_SUBREG GR32:$src, sub_8bit), (EXTRACT_SUBREG GR32:$src, sub_8bit_hi))>; // (shl x, 1) ==> (add x, x) // Note that if x is undef (immediate or otherwise), we could theoretically // end up with the two uses of x getting different values, producing a result // where the least significant bit is not 0. However, the probability of this // happening is considered low enough that this is officially not a // "real problem". let Predicates = [NoNDD] in { def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>; def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>; def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>; def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>; } let Predicates = [HasNDD] in { def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr_ND GR8 :$src1, GR8 :$src1)>; def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr_ND GR16:$src1, GR16:$src1)>; def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr_ND GR32:$src1, GR32:$src1)>; def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr_ND GR64:$src1, GR64:$src1)>; } // Shift amount is implicitly masked. multiclass MaskedShiftAmountPats { // (shift x (and y, 31)) ==> (shift x, y) // (shift x (and y, 63)) ==> (shift x, y) let Predicates = [NoNDD] in { def : Pat<(frag GR8:$src1, (shiftMask32 CL)), (!cast(NAME # "8rCL") GR8:$src1)>; def : Pat<(frag GR16:$src1, (shiftMask32 CL)), (!cast(NAME # "16rCL") GR16:$src1)>; def : Pat<(frag GR32:$src1, (shiftMask32 CL)), (!cast(NAME # "32rCL") GR32:$src1)>; def : Pat<(frag GR64:$src1, (shiftMask64 CL)), (!cast(NAME # "64rCL") GR64:$src1)>; } let Predicates = [HasNDD] in { def : Pat<(frag GR8:$src1, (shiftMask32 CL)), (!cast(NAME # "8rCL_ND") GR8:$src1)>; def : Pat<(frag GR16:$src1, (shiftMask32 CL)), (!cast(NAME # "16rCL_ND") GR16:$src1)>; def : Pat<(frag GR32:$src1, (shiftMask32 CL)), (!cast(NAME # "32rCL_ND") GR32:$src1)>; def : Pat<(frag GR64:$src1, (shiftMask64 CL)), (!cast(NAME # "64rCL_ND") GR64:$src1)>; } def : Pat<(store (frag (loadi8 addr:$dst), (shiftMask32 CL)), addr:$dst), (!cast(NAME # "8mCL") addr:$dst)>; def : Pat<(store (frag (loadi16 addr:$dst), (shiftMask32 CL)), addr:$dst), (!cast(NAME # "16mCL") addr:$dst)>; def : Pat<(store (frag (loadi32 addr:$dst), (shiftMask32 CL)), addr:$dst), (!cast(NAME # "32mCL") addr:$dst)>; def : Pat<(store (frag (loadi64 addr:$dst), (shiftMask64 CL)), addr:$dst), (!cast(NAME # "64mCL") addr:$dst)>; let Predicates = [HasNDD] in { def : Pat<(frag (loadi8 addr:$src), (shiftMask32 CL)), (!cast(NAME # "8mCL_ND") addr:$src)>; def : Pat<(frag (loadi16 addr:$src), (shiftMask32 CL)), (!cast(NAME # "16mCL_ND") addr:$src)>; def : Pat<(frag (loadi32 addr:$src), (shiftMask32 CL)), (!cast(NAME # "32mCL_ND") addr:$src)>; def : Pat<(frag (loadi64 addr:$src), (shiftMask64 CL)), (!cast(NAME # "64mCL_ND") addr:$src)>; } } defm SHL : MaskedShiftAmountPats; defm SHR : MaskedShiftAmountPats; defm SAR : MaskedShiftAmountPats; // ROL/ROR instructions allow a stronger mask optimization than shift for 8- and // 16-bit. We can remove a mask of any (bitwidth - 1) on the rotation amount // because over-rotating produces the same result. This is noted in the Intel // docs with: "tempCOUNT <- (COUNT & COUNTMASK) MOD SIZE". Masking the rotation // amount could affect EFLAGS results, but that does not matter because we are // not tracking flags for these nodes. multiclass MaskedRotateAmountPats { // (rot x (and y, BitWidth - 1)) ==> (rot x, y) let Predicates = [NoNDD] in { def : Pat<(frag GR8:$src1, (shiftMask8 CL)), (!cast(NAME # "8rCL") GR8:$src1)>; def : Pat<(frag GR16:$src1, (shiftMask16 CL)), (!cast(NAME # "16rCL") GR16:$src1)>; def : Pat<(frag GR32:$src1, (shiftMask32 CL)), (!cast(NAME # "32rCL") GR32:$src1)>; def : Pat<(frag GR64:$src1, (shiftMask64 CL)), (!cast(NAME # "64rCL") GR64:$src1)>; } let Predicates = [HasNDD] in { def : Pat<(frag GR8:$src1, (shiftMask8 CL)), (!cast(NAME # "8rCL_ND") GR8:$src1)>; def : Pat<(frag GR16:$src1, (shiftMask16 CL)), (!cast(NAME # "16rCL_ND") GR16:$src1)>; def : Pat<(frag GR32:$src1, (shiftMask32 CL)), (!cast(NAME # "32rCL_ND") GR32:$src1)>; def : Pat<(frag GR64:$src1, (shiftMask64 CL)), (!cast(NAME # "64rCL_ND") GR64:$src1)>; } def : Pat<(store (frag (loadi8 addr:$dst), (shiftMask8 CL)), addr:$dst), (!cast(NAME # "8mCL") addr:$dst)>; def : Pat<(store (frag (loadi16 addr:$dst), (shiftMask16 CL)), addr:$dst), (!cast(NAME # "16mCL") addr:$dst)>; def : Pat<(store (frag (loadi32 addr:$dst), (shiftMask32 CL)), addr:$dst), (!cast(NAME # "32mCL") addr:$dst)>; def : Pat<(store (frag (loadi64 addr:$dst), (shiftMask64 CL)), addr:$dst), (!cast(NAME # "64mCL") addr:$dst)>; let Predicates = [HasNDD] in { def : Pat<(frag (loadi8 addr:$src), (shiftMask8 CL)), (!cast(NAME # "8mCL_ND") addr:$src)>; def : Pat<(frag (loadi16 addr:$src), (shiftMask16 CL)), (!cast(NAME # "16mCL_ND") addr:$src)>; def : Pat<(frag (loadi32 addr:$src), (shiftMask32 CL)), (!cast(NAME # "32mCL_ND") addr:$src)>; def : Pat<(frag (loadi64 addr:$src), (shiftMask64 CL)), (!cast(NAME # "64mCL_ND") addr:$src)>; } } defm ROL : MaskedRotateAmountPats; defm ROR : MaskedRotateAmountPats; multiclass MaskedShlrdAmountPats { let Predicates = [p] in { // Double "funnel" shift amount is implicitly masked. // (fshl/fshr x (and y, 31)) ==> (fshl/fshr x, y) (NOTE: modulo32) def : Pat<(X86fshl GR16:$src1, GR16:$src2, (shiftMask32 CL)), (!cast(SHLD16rrCL#suffix) GR16:$src1, GR16:$src2)>; def : Pat<(X86fshr GR16:$src2, GR16:$src1, (shiftMask32 CL)), (!cast(SHRD16rrCL#suffix) GR16:$src1, GR16:$src2)>; // (fshl/fshr x (and y, 31)) ==> (fshl/fshr x, y) def : Pat<(fshl GR32:$src1, GR32:$src2, (shiftMask32 CL)), (!cast(SHLD32rrCL#suffix) GR32:$src1, GR32:$src2)>; def : Pat<(fshr GR32:$src2, GR32:$src1, (shiftMask32 CL)), (!cast(SHRD32rrCL#suffix) GR32:$src1, GR32:$src2)>; // (fshl/fshr x (and y, 63)) ==> (fshl/fshr x, y) def : Pat<(fshl GR64:$src1, GR64:$src2, (shiftMask64 CL)), (!cast(SHLD64rrCL#suffix) GR64:$src1, GR64:$src2)>; def : Pat<(fshr GR64:$src2, GR64:$src1, (shiftMask64 CL)), (!cast(SHRD64rrCL#suffix) GR64:$src1, GR64:$src2)>; } } defm : MaskedShlrdAmountPats<"", NoNDD>; defm : MaskedShlrdAmountPats<"_ND", HasNDD>; // Use BTR/BTS/BTC for clearing/setting/toggling a bit in a variable location. multiclass OneBitPats { def : Pat<(and rc:$src1, (rotl -2, GR8:$src2)), (btr rc:$src1, (INSERT_SUBREG (vt (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; def : Pat<(or rc:$src1, (shl 1, GR8:$src2)), (bts rc:$src1, (INSERT_SUBREG (vt (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; def : Pat<(xor rc:$src1, (shl 1, GR8:$src2)), (btc rc:$src1, (INSERT_SUBREG (vt (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; // Similar to above, but removing unneeded masking of the shift amount. def : Pat<(and rc:$src1, (rotl -2, (mask GR8:$src2))), (btr rc:$src1, (INSERT_SUBREG (vt (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; def : Pat<(or rc:$src1, (shl 1, (mask GR8:$src2))), (bts rc:$src1, (INSERT_SUBREG (vt (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; def : Pat<(xor rc:$src1, (shl 1, (mask GR8:$src2))), (btc rc:$src1, (INSERT_SUBREG (vt (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; } defm : OneBitPats; defm : OneBitPats; defm : OneBitPats; //===----------------------------------------------------------------------===// // EFLAGS-defining Patterns //===----------------------------------------------------------------------===// multiclass EFLAGSDefiningPats { let Predicates = [p] in { // add reg, reg def : Pat<(add GR8 :$src1, GR8 :$src2), (!cast(ADD8rr#suffix) GR8 :$src1, GR8 :$src2)>; def : Pat<(add GR16:$src1, GR16:$src2), (!cast(ADD16rr#suffix) GR16:$src1, GR16:$src2)>; def : Pat<(add GR32:$src1, GR32:$src2), (!cast(ADD32rr#suffix) GR32:$src1, GR32:$src2)>; def : Pat<(add GR64:$src1, GR64:$src2), (!cast(ADD64rr#suffix) GR64:$src1, GR64:$src2)>; // add reg, mem def : Pat<(add GR8:$src1, (loadi8 addr:$src2)), (!cast(ADD8rm#suffix) GR8:$src1, addr:$src2)>; def : Pat<(add GR16:$src1, (loadi16 addr:$src2)), (!cast(ADD16rm#suffix) GR16:$src1, addr:$src2)>; def : Pat<(add GR32:$src1, (loadi32 addr:$src2)), (!cast(ADD32rm#suffix) GR32:$src1, addr:$src2)>; def : Pat<(add GR64:$src1, (loadi64 addr:$src2)), (!cast(ADD64rm#suffix) GR64:$src1, addr:$src2)>; // add reg, imm def : Pat<(add GR8 :$src1, imm:$src2), (!cast(ADD8ri#suffix) GR8:$src1 , imm:$src2)>; def : Pat<(add GR16:$src1, imm:$src2), (!cast(ADD16ri#suffix) GR16:$src1, imm:$src2)>; def : Pat<(add GR32:$src1, imm:$src2), (!cast(ADD32ri#suffix) GR32:$src1, imm:$src2)>; def : Pat<(add GR64:$src1, i64immSExt32:$src2), (!cast(ADD64ri32#suffix) GR64:$src1, i64immSExt32:$src2)>; // sub reg, reg def : Pat<(sub GR8 :$src1, GR8 :$src2), (!cast(SUB8rr#suffix) GR8 :$src1, GR8 :$src2)>; def : Pat<(sub GR16:$src1, GR16:$src2), (!cast(SUB16rr#suffix) GR16:$src1, GR16:$src2)>; def : Pat<(sub GR32:$src1, GR32:$src2), (!cast(SUB32rr#suffix) GR32:$src1, GR32:$src2)>; def : Pat<(sub GR64:$src1, GR64:$src2), (!cast(SUB64rr#suffix) GR64:$src1, GR64:$src2)>; // sub reg, mem def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)), (!cast(SUB8rm#suffix) GR8:$src1, addr:$src2)>; def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)), (!cast(SUB16rm#suffix) GR16:$src1, addr:$src2)>; def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)), (!cast(SUB32rm#suffix) GR32:$src1, addr:$src2)>; def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)), (!cast(SUB64rm#suffix) GR64:$src1, addr:$src2)>; // sub reg, imm def : Pat<(sub GR8:$src1, imm:$src2), (!cast(SUB8ri#suffix) GR8:$src1, imm:$src2)>; def : Pat<(sub GR16:$src1, imm:$src2), (!cast(SUB16ri#suffix) GR16:$src1, imm:$src2)>; def : Pat<(sub GR32:$src1, imm:$src2), (!cast(SUB32ri#suffix) GR32:$src1, imm:$src2)>; def : Pat<(sub GR64:$src1, i64immSExt32:$src2), (!cast(SUB64ri32#suffix) GR64:$src1, i64immSExt32:$src2)>; // sub 0, reg def : Pat<(X86sub_flag 0, GR8 :$src), (!cast(NEG8r#suffix) GR8 :$src)>; def : Pat<(X86sub_flag 0, GR16:$src), (!cast(NEG16r#suffix) GR16:$src)>; def : Pat<(X86sub_flag 0, GR32:$src), (!cast(NEG32r#suffix) GR32:$src)>; def : Pat<(X86sub_flag 0, GR64:$src), (!cast(NEG64r#suffix) GR64:$src)>; // mul reg, reg def : Pat<(mul GR16:$src1, GR16:$src2), (!cast(IMUL16rr#suffix) GR16:$src1, GR16:$src2)>; def : Pat<(mul GR32:$src1, GR32:$src2), (!cast(IMUL32rr#suffix) GR32:$src1, GR32:$src2)>; def : Pat<(mul GR64:$src1, GR64:$src2), (!cast(IMUL64rr#suffix) GR64:$src1, GR64:$src2)>; // mul reg, mem def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)), (!cast(IMUL16rm#suffix) GR16:$src1, addr:$src2)>; def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)), (!cast(IMUL32rm#suffix) GR32:$src1, addr:$src2)>; def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)), (!cast(IMUL64rm#suffix) GR64:$src1, addr:$src2)>; // or reg/reg. def : Pat<(or GR8 :$src1, GR8 :$src2), (!cast(OR8rr#suffix) GR8 :$src1, GR8 :$src2)>; def : Pat<(or GR16:$src1, GR16:$src2), (!cast(OR16rr#suffix) GR16:$src1, GR16:$src2)>; def : Pat<(or GR32:$src1, GR32:$src2), (!cast(OR32rr#suffix) GR32:$src1, GR32:$src2)>; def : Pat<(or GR64:$src1, GR64:$src2), (!cast(OR64rr#suffix) GR64:$src1, GR64:$src2)>; // or reg/mem def : Pat<(or GR8:$src1, (loadi8 addr:$src2)), (!cast(OR8rm#suffix) GR8:$src1, addr:$src2)>; def : Pat<(or GR16:$src1, (loadi16 addr:$src2)), (!cast(OR16rm#suffix) GR16:$src1, addr:$src2)>; def : Pat<(or GR32:$src1, (loadi32 addr:$src2)), (!cast(OR32rm#suffix) GR32:$src1, addr:$src2)>; def : Pat<(or GR64:$src1, (loadi64 addr:$src2)), (!cast(OR64rm#suffix) GR64:$src1, addr:$src2)>; // or reg/imm def : Pat<(or GR8:$src1 , imm:$src2), (!cast(OR8ri#suffix) GR8 :$src1, imm:$src2)>; def : Pat<(or GR16:$src1, imm:$src2), (!cast(OR16ri#suffix) GR16:$src1, imm:$src2)>; def : Pat<(or GR32:$src1, imm:$src2), (!cast(OR32ri#suffix) GR32:$src1, imm:$src2)>; def : Pat<(or GR64:$src1, i64immSExt32:$src2), (!cast(OR64ri32#suffix) GR64:$src1, i64immSExt32:$src2)>; // xor reg/reg def : Pat<(xor GR8 :$src1, GR8 :$src2), (!cast(XOR8rr#suffix) GR8 :$src1, GR8 :$src2)>; def : Pat<(xor GR16:$src1, GR16:$src2), (!cast(XOR16rr#suffix) GR16:$src1, GR16:$src2)>; def : Pat<(xor GR32:$src1, GR32:$src2), (!cast(XOR32rr#suffix) GR32:$src1, GR32:$src2)>; def : Pat<(xor GR64:$src1, GR64:$src2), (!cast(XOR64rr#suffix) GR64:$src1, GR64:$src2)>; // xor reg/mem def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)), (!cast(XOR8rm#suffix) GR8:$src1, addr:$src2)>; def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)), (!cast(XOR16rm#suffix) GR16:$src1, addr:$src2)>; def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)), (!cast(XOR32rm#suffix) GR32:$src1, addr:$src2)>; def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)), (!cast(XOR64rm#suffix) GR64:$src1, addr:$src2)>; // xor reg/imm def : Pat<(xor GR8:$src1, imm:$src2), (!cast(XOR8ri#suffix) GR8:$src1, imm:$src2)>; def : Pat<(xor GR16:$src1, imm:$src2), (!cast(XOR16ri#suffix) GR16:$src1, imm:$src2)>; def : Pat<(xor GR32:$src1, imm:$src2), (!cast(XOR32ri#suffix) GR32:$src1, imm:$src2)>; def : Pat<(xor GR64:$src1, i64immSExt32:$src2), (!cast(XOR64ri32#suffix) GR64:$src1, i64immSExt32:$src2)>; // and reg/reg def : Pat<(and GR8 :$src1, GR8 :$src2), (!cast(AND8rr#suffix) GR8 :$src1, GR8 :$src2)>; def : Pat<(and GR16:$src1, GR16:$src2), (!cast(AND16rr#suffix) GR16:$src1, GR16:$src2)>; def : Pat<(and GR32:$src1, GR32:$src2), (!cast(AND32rr#suffix) GR32:$src1, GR32:$src2)>; def : Pat<(and GR64:$src1, GR64:$src2), (!cast(AND64rr#suffix) GR64:$src1, GR64:$src2)>; // and reg/mem def : Pat<(and GR8:$src1, (loadi8 addr:$src2)), (!cast(AND8rm#suffix) GR8:$src1, addr:$src2)>; def : Pat<(and GR16:$src1, (loadi16 addr:$src2)), (!cast(AND16rm#suffix) GR16:$src1, addr:$src2)>; def : Pat<(and GR32:$src1, (loadi32 addr:$src2)), (!cast(AND32rm#suffix) GR32:$src1, addr:$src2)>; def : Pat<(and GR64:$src1, (loadi64 addr:$src2)), (!cast(AND64rm#suffix) GR64:$src1, addr:$src2)>; // and reg/imm def : Pat<(and GR8:$src1, imm:$src2), (!cast(AND8ri#suffix) GR8:$src1, imm:$src2)>; def : Pat<(and GR16:$src1, imm:$src2), (!cast(AND16ri#suffix) GR16:$src1, imm:$src2)>; def : Pat<(and GR32:$src1, imm:$src2), (!cast(AND32ri#suffix) GR32:$src1, imm:$src2)>; def : Pat<(and GR64:$src1, i64immSExt32:$src2), (!cast(AND64ri32#suffix) GR64:$src1, i64immSExt32:$src2)>; } // Increment/Decrement reg. // Do not make INC/DEC if it is slow let Predicates = [UseIncDec, p] in { def : Pat<(add GR8:$src, 1), (!cast(INC8r#suffix) GR8:$src)>; def : Pat<(add GR16:$src, 1), (!cast(INC16r#suffix) GR16:$src)>; def : Pat<(add GR32:$src, 1), (!cast(INC32r#suffix) GR32:$src)>; def : Pat<(add GR64:$src, 1), (!cast(INC64r#suffix) GR64:$src)>; def : Pat<(add GR8:$src, -1), (!cast(DEC8r#suffix) GR8:$src)>; def : Pat<(add GR16:$src, -1), (!cast(DEC16r#suffix) GR16:$src)>; def : Pat<(add GR32:$src, -1), (!cast(DEC32r#suffix) GR32:$src)>; def : Pat<(add GR64:$src, -1), (!cast(DEC64r#suffix) GR64:$src)>; def : Pat<(X86add_flag_nocf GR8:$src, -1), (!cast(DEC8r#suffix) GR8:$src)>; def : Pat<(X86add_flag_nocf GR16:$src, -1), (!cast(DEC16r#suffix) GR16:$src)>; def : Pat<(X86add_flag_nocf GR32:$src, -1), (!cast(DEC32r#suffix) GR32:$src)>; def : Pat<(X86add_flag_nocf GR64:$src, -1), (!cast(DEC64r#suffix) GR64:$src)>; def : Pat<(X86sub_flag_nocf GR8:$src, -1), (!cast(INC8r#suffix) GR8:$src)>; def : Pat<(X86sub_flag_nocf GR16:$src, -1), (!cast(INC16r#suffix) GR16:$src)>; def : Pat<(X86sub_flag_nocf GR32:$src, -1), (!cast(INC32r#suffix) GR32:$src)>; def : Pat<(X86sub_flag_nocf GR64:$src, -1), (!cast(INC64r#suffix) GR64:$src)>; def : Pat<(or_is_add GR8:$src, 1), (!cast(INC8r#suffix) GR8:$src)>; def : Pat<(or_is_add GR16:$src, 1), (!cast(INC16r#suffix) GR16:$src)>; def : Pat<(or_is_add GR32:$src, 1), (!cast(INC32r#suffix) GR32:$src)>; def : Pat<(or_is_add GR64:$src, 1), (!cast(INC64r#suffix) GR64:$src)>; } } defm : EFLAGSDefiningPats<"", NoNDD>; defm : EFLAGSDefiningPats<"_ND", HasNDD>; // mul reg, imm def : Pat<(mul GR16:$src1, imm:$src2), (IMUL16rri GR16:$src1, imm:$src2)>; def : Pat<(mul GR32:$src1, imm:$src2), (IMUL32rri GR32:$src1, imm:$src2)>; def : Pat<(mul GR64:$src1, i64immSExt32:$src2), (IMUL64rri32 GR64:$src1, i64immSExt32:$src2)>; // reg = mul mem, imm def : Pat<(mul (loadi16 addr:$src1), imm:$src2), (IMUL16rmi addr:$src1, imm:$src2)>; def : Pat<(mul (loadi32 addr:$src1), imm:$src2), (IMUL32rmi addr:$src1, imm:$src2)>; def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2), (IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>; // Bit scan instruction patterns to match explicit zero-undef behavior. def : Pat<(cttz_zero_undef GR16:$src), (BSF16rr GR16:$src)>; def : Pat<(cttz_zero_undef GR32:$src), (BSF32rr GR32:$src)>; def : Pat<(cttz_zero_undef GR64:$src), (BSF64rr GR64:$src)>; def : Pat<(cttz_zero_undef (loadi16 addr:$src)), (BSF16rm addr:$src)>; def : Pat<(cttz_zero_undef (loadi32 addr:$src)), (BSF32rm addr:$src)>; def : Pat<(cttz_zero_undef (loadi64 addr:$src)), (BSF64rm addr:$src)>; // When HasMOVBE is enabled it is possible to get a non-legalized // register-register 16 bit bswap. This maps it to a ROL instruction. let Predicates = [HasMOVBE] in { def : Pat<(bswap GR16:$src), (ROL16ri GR16:$src, (i8 8))>; } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp b/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp index e4895b59f4b4..cb052da79bb3 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp @@ -1,576 +1,587 @@ //===- InstCombineNegator.cpp -----------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements sinking of negation into expression trees, // as long as that can be done without increasing instruction count. // //===----------------------------------------------------------------------===// #include "InstCombineInternal.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/TargetFolder.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/DebugCounter.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/InstCombine/InstCombiner.h" #include #include #include #include #include namespace llvm { class DataLayout; class LLVMContext; } // namespace llvm using namespace llvm; #define DEBUG_TYPE "instcombine" STATISTIC(NegatorTotalNegationsAttempted, "Negator: Number of negations attempted to be sinked"); STATISTIC(NegatorNumTreesNegated, "Negator: Number of negations successfully sinked"); STATISTIC(NegatorMaxDepthVisited, "Negator: Maximal traversal depth ever " "reached while attempting to sink negation"); STATISTIC(NegatorTimesDepthLimitReached, "Negator: How many times did the traversal depth limit was reached " "during sinking"); STATISTIC( NegatorNumValuesVisited, "Negator: Total number of values visited during attempts to sink negation"); STATISTIC(NegatorNumNegationsFoundInCache, "Negator: How many negations did we retrieve/reuse from cache"); STATISTIC(NegatorMaxTotalValuesVisited, "Negator: Maximal number of values ever visited while attempting to " "sink negation"); STATISTIC(NegatorNumInstructionsCreatedTotal, "Negator: Number of new negated instructions created, total"); STATISTIC(NegatorMaxInstructionsCreated, "Negator: Maximal number of new instructions created during negation " "attempt"); STATISTIC(NegatorNumInstructionsNegatedSuccess, "Negator: Number of new negated instructions created in successful " "negation sinking attempts"); DEBUG_COUNTER(NegatorCounter, "instcombine-negator", "Controls Negator transformations in InstCombine pass"); static cl::opt NegatorEnabled("instcombine-negator-enabled", cl::init(true), cl::desc("Should we attempt to sink negations?")); static cl::opt NegatorMaxDepth("instcombine-negator-max-depth", cl::init(NegatorDefaultMaxDepth), cl::desc("What is the maximal lookup depth when trying to " "check for viability of negation sinking.")); Negator::Negator(LLVMContext &C, const DataLayout &DL, bool IsTrulyNegation_) : Builder(C, TargetFolder(DL), IRBuilderCallbackInserter([&](Instruction *I) { ++NegatorNumInstructionsCreatedTotal; NewInstructions.push_back(I); })), IsTrulyNegation(IsTrulyNegation_) {} #if LLVM_ENABLE_STATS Negator::~Negator() { NegatorMaxTotalValuesVisited.updateMax(NumValuesVisitedInThisNegator); } #endif // Due to the InstCombine's worklist management, there are no guarantees that // each instruction we'll encounter has been visited by InstCombine already. // In particular, most importantly for us, that means we have to canonicalize // constants to RHS ourselves, since that is helpful sometimes. std::array Negator::getSortedOperandsOfBinOp(Instruction *I) { assert(I->getNumOperands() == 2 && "Only for binops!"); std::array Ops{I->getOperand(0), I->getOperand(1)}; if (I->isCommutative() && InstCombiner::getComplexity(I->getOperand(0)) < InstCombiner::getComplexity(I->getOperand(1))) std::swap(Ops[0], Ops[1]); return Ops; } // FIXME: can this be reworked into a worklist-based algorithm while preserving // the depth-first, early bailout traversal? [[nodiscard]] Value *Negator::visitImpl(Value *V, bool IsNSW, unsigned Depth) { // -(undef) -> undef. if (match(V, m_Undef())) return V; // In i1, negation can simply be ignored. if (V->getType()->isIntOrIntVectorTy(1)) return V; Value *X; // -(-(X)) -> X. if (match(V, m_Neg(m_Value(X)))) return X; // Integral constants can be freely negated. if (match(V, m_AnyIntegralConstant())) return ConstantExpr::getNeg(cast(V), /*HasNSW=*/false); // If we have a non-instruction, then give up. if (!isa(V)) return nullptr; // If we have started with a true negation (i.e. `sub 0, %y`), then if we've // got instruction that does not require recursive reasoning, we can still // negate it even if it has other uses, without increasing instruction count. if (!V->hasOneUse() && !IsTrulyNegation) return nullptr; auto *I = cast(V); unsigned BitWidth = I->getType()->getScalarSizeInBits(); // We must preserve the insertion point and debug info that is set in the // builder at the time this function is called. InstCombiner::BuilderTy::InsertPointGuard Guard(Builder); // And since we are trying to negate instruction I, that tells us about the // insertion point and the debug info that we need to keep. Builder.SetInsertPoint(I); // In some cases we can give the answer without further recursion. switch (I->getOpcode()) { case Instruction::Add: { std::array Ops = getSortedOperandsOfBinOp(I); // `inc` is always negatible. if (match(Ops[1], m_One())) return Builder.CreateNot(Ops[0], I->getName() + ".neg"); break; } case Instruction::Xor: // `not` is always negatible. if (match(I, m_Not(m_Value(X)))) return Builder.CreateAdd(X, ConstantInt::get(X->getType(), 1), I->getName() + ".neg"); break; case Instruction::AShr: case Instruction::LShr: { // Right-shift sign bit smear is negatible. const APInt *Op1Val; if (match(I->getOperand(1), m_APInt(Op1Val)) && *Op1Val == BitWidth - 1) { Value *BO = I->getOpcode() == Instruction::AShr ? Builder.CreateLShr(I->getOperand(0), I->getOperand(1)) : Builder.CreateAShr(I->getOperand(0), I->getOperand(1)); if (auto *NewInstr = dyn_cast(BO)) { NewInstr->copyIRFlags(I); NewInstr->setName(I->getName() + ".neg"); } return BO; } // While we could negate exact arithmetic shift: // ashr exact %x, C --> sdiv exact i8 %x, -1<getOperand(0)->getType()->isIntOrIntVectorTy(1)) return I->getOpcode() == Instruction::SExt ? Builder.CreateZExt(I->getOperand(0), I->getType(), I->getName() + ".neg") : Builder.CreateSExt(I->getOperand(0), I->getType(), I->getName() + ".neg"); break; case Instruction::Select: { // If both arms of the select are constants, we don't need to recurse. // Therefore, this transform is not limited by uses. auto *Sel = cast(I); Constant *TrueC, *FalseC; if (match(Sel->getTrueValue(), m_ImmConstant(TrueC)) && match(Sel->getFalseValue(), m_ImmConstant(FalseC))) { Constant *NegTrueC = ConstantExpr::getNeg(TrueC); Constant *NegFalseC = ConstantExpr::getNeg(FalseC); return Builder.CreateSelect(Sel->getCondition(), NegTrueC, NegFalseC, I->getName() + ".neg", /*MDFrom=*/I); } break; } case Instruction::Call: if (auto *CI = dyn_cast(I); CI && CI->hasOneUse()) return Builder.CreateIntrinsic(CI->getType(), CI->getIntrinsicID(), {CI->getRHS(), CI->getLHS()}); break; default: break; // Other instructions require recursive reasoning. } if (I->getOpcode() == Instruction::Sub && (I->hasOneUse() || match(I->getOperand(0), m_ImmConstant()))) { // `sub` is always negatible. // However, only do this either if the old `sub` doesn't stick around, or // it was subtracting from a constant. Otherwise, this isn't profitable. return Builder.CreateSub(I->getOperand(1), I->getOperand(0), I->getName() + ".neg", /* HasNUW */ false, IsNSW && I->hasNoSignedWrap()); } // Some other cases, while still don't require recursion, // are restricted to the one-use case. if (!V->hasOneUse()) return nullptr; switch (I->getOpcode()) { case Instruction::ZExt: { // Negation of zext of signbit is signbit splat: // 0 - (zext (i8 X u>> 7) to iN) --> sext (i8 X s>> 7) to iN Value *SrcOp = I->getOperand(0); unsigned SrcWidth = SrcOp->getType()->getScalarSizeInBits(); const APInt &FullShift = APInt(SrcWidth, SrcWidth - 1); if (IsTrulyNegation && match(SrcOp, m_LShr(m_Value(X), m_SpecificIntAllowPoison(FullShift)))) { Value *Ashr = Builder.CreateAShr(X, FullShift); return Builder.CreateSExt(Ashr, I->getType()); } break; } case Instruction::And: { Constant *ShAmt; // sub(y,and(lshr(x,C),1)) --> add(ashr(shl(x,(BW-1)-C),BW-1),y) if (match(I, m_And(m_OneUse(m_TruncOrSelf( m_LShr(m_Value(X), m_ImmConstant(ShAmt)))), m_One()))) { unsigned BW = X->getType()->getScalarSizeInBits(); Constant *BWMinusOne = ConstantInt::get(X->getType(), BW - 1); Value *R = Builder.CreateShl(X, Builder.CreateSub(BWMinusOne, ShAmt)); R = Builder.CreateAShr(R, BWMinusOne); return Builder.CreateTruncOrBitCast(R, I->getType()); } break; } case Instruction::SDiv: // `sdiv` is negatible if divisor is not undef/INT_MIN/1. // While this is normally not behind a use-check, // let's consider division to be special since it's costly. if (auto *Op1C = dyn_cast(I->getOperand(1))) { if (!Op1C->containsUndefOrPoisonElement() && Op1C->isNotMinSignedValue() && Op1C->isNotOneValue()) { Value *BO = Builder.CreateSDiv(I->getOperand(0), ConstantExpr::getNeg(Op1C), I->getName() + ".neg"); if (auto *NewInstr = dyn_cast(BO)) NewInstr->setIsExact(I->isExact()); return BO; } } break; } // Rest of the logic is recursive, so if it's time to give up then it's time. if (Depth > NegatorMaxDepth) { LLVM_DEBUG(dbgs() << "Negator: reached maximal allowed traversal depth in " << *V << ". Giving up.\n"); ++NegatorTimesDepthLimitReached; return nullptr; } switch (I->getOpcode()) { case Instruction::Freeze: { // `freeze` is negatible if its operand is negatible. Value *NegOp = negate(I->getOperand(0), IsNSW, Depth + 1); if (!NegOp) // Early return. return nullptr; return Builder.CreateFreeze(NegOp, I->getName() + ".neg"); } case Instruction::PHI: { // `phi` is negatible if all the incoming values are negatible. auto *PHI = cast(I); SmallVector NegatedIncomingValues(PHI->getNumOperands()); for (auto I : zip(PHI->incoming_values(), NegatedIncomingValues)) { if (!(std::get<1>(I) = negate(std::get<0>(I), IsNSW, Depth + 1))) // Early return. return nullptr; } // All incoming values are indeed negatible. Create negated PHI node. PHINode *NegatedPHI = Builder.CreatePHI( PHI->getType(), PHI->getNumOperands(), PHI->getName() + ".neg"); for (auto I : zip(NegatedIncomingValues, PHI->blocks())) NegatedPHI->addIncoming(std::get<0>(I), std::get<1>(I)); return NegatedPHI; } case Instruction::Select: { if (isKnownNegation(I->getOperand(1), I->getOperand(2), /*NeedNSW=*/false, /*AllowPoison=*/false)) { // Of one hand of select is known to be negation of another hand, // just swap the hands around. auto *NewSelect = cast(I->clone()); // Just swap the operands of the select. NewSelect->swapValues(); // Don't swap prof metadata, we didn't change the branch behavior. NewSelect->setName(I->getName() + ".neg"); + // Poison-generating flags should be dropped + Value *TV = NewSelect->getTrueValue(); + Value *FV = NewSelect->getFalseValue(); + if (match(TV, m_Neg(m_Specific(FV)))) + cast(TV)->dropPoisonGeneratingFlags(); + else if (match(FV, m_Neg(m_Specific(TV)))) + cast(FV)->dropPoisonGeneratingFlags(); + else { + cast(TV)->dropPoisonGeneratingFlags(); + cast(FV)->dropPoisonGeneratingFlags(); + } Builder.Insert(NewSelect); return NewSelect; } // `select` is negatible if both hands of `select` are negatible. Value *NegOp1 = negate(I->getOperand(1), IsNSW, Depth + 1); if (!NegOp1) // Early return. return nullptr; Value *NegOp2 = negate(I->getOperand(2), IsNSW, Depth + 1); if (!NegOp2) return nullptr; // Do preserve the metadata! return Builder.CreateSelect(I->getOperand(0), NegOp1, NegOp2, I->getName() + ".neg", /*MDFrom=*/I); } case Instruction::ShuffleVector: { // `shufflevector` is negatible if both operands are negatible. auto *Shuf = cast(I); Value *NegOp0 = negate(I->getOperand(0), IsNSW, Depth + 1); if (!NegOp0) // Early return. return nullptr; Value *NegOp1 = negate(I->getOperand(1), IsNSW, Depth + 1); if (!NegOp1) return nullptr; return Builder.CreateShuffleVector(NegOp0, NegOp1, Shuf->getShuffleMask(), I->getName() + ".neg"); } case Instruction::ExtractElement: { // `extractelement` is negatible if source operand is negatible. auto *EEI = cast(I); Value *NegVector = negate(EEI->getVectorOperand(), IsNSW, Depth + 1); if (!NegVector) // Early return. return nullptr; return Builder.CreateExtractElement(NegVector, EEI->getIndexOperand(), I->getName() + ".neg"); } case Instruction::InsertElement: { // `insertelement` is negatible if both the source vector and // element-to-be-inserted are negatible. auto *IEI = cast(I); Value *NegVector = negate(IEI->getOperand(0), IsNSW, Depth + 1); if (!NegVector) // Early return. return nullptr; Value *NegNewElt = negate(IEI->getOperand(1), IsNSW, Depth + 1); if (!NegNewElt) // Early return. return nullptr; return Builder.CreateInsertElement(NegVector, NegNewElt, IEI->getOperand(2), I->getName() + ".neg"); } case Instruction::Trunc: { // `trunc` is negatible if its operand is negatible. Value *NegOp = negate(I->getOperand(0), /* IsNSW */ false, Depth + 1); if (!NegOp) // Early return. return nullptr; return Builder.CreateTrunc(NegOp, I->getType(), I->getName() + ".neg"); } case Instruction::Shl: { // `shl` is negatible if the first operand is negatible. IsNSW &= I->hasNoSignedWrap(); if (Value *NegOp0 = negate(I->getOperand(0), IsNSW, Depth + 1)) return Builder.CreateShl(NegOp0, I->getOperand(1), I->getName() + ".neg", /* HasNUW */ false, IsNSW); // Otherwise, `shl %x, C` can be interpreted as `mul %x, 1<getOperand(1), m_ImmConstant(Op1C)) || !IsTrulyNegation) return nullptr; return Builder.CreateMul( I->getOperand(0), Builder.CreateShl(Constant::getAllOnesValue(Op1C->getType()), Op1C), I->getName() + ".neg", /* HasNUW */ false, IsNSW); } case Instruction::Or: { if (!cast(I)->isDisjoint()) return nullptr; // Don't know how to handle `or` in general. std::array Ops = getSortedOperandsOfBinOp(I); // `or`/`add` are interchangeable when operands have no common bits set. // `inc` is always negatible. if (match(Ops[1], m_One())) return Builder.CreateNot(Ops[0], I->getName() + ".neg"); // Else, just defer to Instruction::Add handling. [[fallthrough]]; } case Instruction::Add: { // `add` is negatible if both of its operands are negatible. SmallVector NegatedOps, NonNegatedOps; for (Value *Op : I->operands()) { // Can we sink the negation into this operand? if (Value *NegOp = negate(Op, /* IsNSW */ false, Depth + 1)) { NegatedOps.emplace_back(NegOp); // Successfully negated operand! continue; } // Failed to sink negation into this operand. IFF we started from negation // and we manage to sink negation into one operand, we can still do this. if (!IsTrulyNegation) return nullptr; NonNegatedOps.emplace_back(Op); // Just record which operand that was. } assert((NegatedOps.size() + NonNegatedOps.size()) == 2 && "Internal consistency check failed."); // Did we manage to sink negation into both of the operands? if (NegatedOps.size() == 2) // Then we get to keep the `add`! return Builder.CreateAdd(NegatedOps[0], NegatedOps[1], I->getName() + ".neg"); assert(IsTrulyNegation && "We should have early-exited then."); // Completely failed to sink negation? if (NonNegatedOps.size() == 2) return nullptr; // 0-(a+b) --> (-a)-b return Builder.CreateSub(NegatedOps[0], NonNegatedOps[0], I->getName() + ".neg"); } case Instruction::Xor: { std::array Ops = getSortedOperandsOfBinOp(I); // `xor` is negatible if one of its operands is invertible. // FIXME: InstCombineInverter? But how to connect Inverter and Negator? if (auto *C = dyn_cast(Ops[1])) { if (IsTrulyNegation) { Value *Xor = Builder.CreateXor(Ops[0], ConstantExpr::getNot(C)); return Builder.CreateAdd(Xor, ConstantInt::get(Xor->getType(), 1), I->getName() + ".neg"); } } return nullptr; } case Instruction::Mul: { std::array Ops = getSortedOperandsOfBinOp(I); // `mul` is negatible if one of its operands is negatible. Value *NegatedOp, *OtherOp; // First try the second operand, in case it's a constant it will be best to // just invert it instead of sinking the `neg` deeper. if (Value *NegOp1 = negate(Ops[1], /* IsNSW */ false, Depth + 1)) { NegatedOp = NegOp1; OtherOp = Ops[0]; } else if (Value *NegOp0 = negate(Ops[0], /* IsNSW */ false, Depth + 1)) { NegatedOp = NegOp0; OtherOp = Ops[1]; } else // Can't negate either of them. return nullptr; return Builder.CreateMul(NegatedOp, OtherOp, I->getName() + ".neg", /* HasNUW */ false, IsNSW && I->hasNoSignedWrap()); } default: return nullptr; // Don't know, likely not negatible for free. } llvm_unreachable("Can't get here. We always return from switch."); } [[nodiscard]] Value *Negator::negate(Value *V, bool IsNSW, unsigned Depth) { NegatorMaxDepthVisited.updateMax(Depth); ++NegatorNumValuesVisited; #if LLVM_ENABLE_STATS ++NumValuesVisitedInThisNegator; #endif #ifndef NDEBUG // We can't ever have a Value with such an address. Value *Placeholder = reinterpret_cast(static_cast(-1)); #endif // Did we already try to negate this value? auto NegationsCacheIterator = NegationsCache.find(V); if (NegationsCacheIterator != NegationsCache.end()) { ++NegatorNumNegationsFoundInCache; Value *NegatedV = NegationsCacheIterator->second; assert(NegatedV != Placeholder && "Encountered a cycle during negation."); return NegatedV; } #ifndef NDEBUG // We did not find a cached result for negation of V. While there, // let's temporairly cache a placeholder value, with the idea that if later // during negation we fetch it from cache, we'll know we're in a cycle. NegationsCache[V] = Placeholder; #endif // No luck. Try negating it for real. Value *NegatedV = visitImpl(V, IsNSW, Depth); // And cache the (real) result for the future. NegationsCache[V] = NegatedV; return NegatedV; } [[nodiscard]] std::optional Negator::run(Value *Root, bool IsNSW) { Value *Negated = negate(Root, IsNSW, /*Depth=*/0); if (!Negated) { // We must cleanup newly-inserted instructions, to avoid any potential // endless combine looping. for (Instruction *I : llvm::reverse(NewInstructions)) I->eraseFromParent(); return std::nullopt; } return std::make_pair(ArrayRef(NewInstructions), Negated); } [[nodiscard]] Value *Negator::Negate(bool LHSIsZero, bool IsNSW, Value *Root, InstCombinerImpl &IC) { ++NegatorTotalNegationsAttempted; LLVM_DEBUG(dbgs() << "Negator: attempting to sink negation into " << *Root << "\n"); if (!NegatorEnabled || !DebugCounter::shouldExecute(NegatorCounter)) return nullptr; Negator N(Root->getContext(), IC.getDataLayout(), LHSIsZero); std::optional Res = N.run(Root, IsNSW); if (!Res) { // Negation failed. LLVM_DEBUG(dbgs() << "Negator: failed to sink negation into " << *Root << "\n"); return nullptr; } LLVM_DEBUG(dbgs() << "Negator: successfully sunk negation into " << *Root << "\n NEW: " << *Res->second << "\n"); ++NegatorNumTreesNegated; // We must temporarily unset the 'current' insertion point and DebugLoc of the // InstCombine's IRBuilder so that it won't interfere with the ones we have // already specified when producing negated instructions. InstCombiner::BuilderTy::InsertPointGuard Guard(IC.Builder); IC.Builder.ClearInsertionPoint(); IC.Builder.SetCurrentDebugLocation(DebugLoc()); // And finally, we must add newly-created instructions into the InstCombine's // worklist (in a proper order!) so it can attempt to combine them. LLVM_DEBUG(dbgs() << "Negator: Propagating " << Res->first.size() << " instrs to InstCombine\n"); NegatorMaxInstructionsCreated.updateMax(Res->first.size()); NegatorNumInstructionsNegatedSuccess += Res->first.size(); // They are in def-use order, so nothing fancy, just insert them in order. for (Instruction *I : Res->first) IC.Builder.Insert(I, I->getName()); // And return the new root. return Res->second; } diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp index 86411320ab24..b05a33c68889 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -1,1654 +1,1655 @@ //===- InstCombinePHI.cpp -------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements the visitPHINode function. // //===----------------------------------------------------------------------===// #include "InstCombineInternal.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/InstCombine/InstCombiner.h" #include "llvm/Transforms/Utils/Local.h" #include using namespace llvm; using namespace llvm::PatternMatch; #define DEBUG_TYPE "instcombine" static cl::opt MaxNumPhis("instcombine-max-num-phis", cl::init(512), cl::desc("Maximum number phis to handle in intptr/ptrint folding")); STATISTIC(NumPHIsOfInsertValues, "Number of phi-of-insertvalue turned into insertvalue-of-phis"); STATISTIC(NumPHIsOfExtractValues, "Number of phi-of-extractvalue turned into extractvalue-of-phi"); STATISTIC(NumPHICSEs, "Number of PHI's that got CSE'd"); /// The PHI arguments will be folded into a single operation with a PHI node /// as input. The debug location of the single operation will be the merged /// locations of the original PHI node arguments. void InstCombinerImpl::PHIArgMergedDebugLoc(Instruction *Inst, PHINode &PN) { auto *FirstInst = cast(PN.getIncomingValue(0)); Inst->setDebugLoc(FirstInst->getDebugLoc()); // We do not expect a CallInst here, otherwise, N-way merging of DebugLoc // will be inefficient. assert(!isa(Inst)); for (Value *V : drop_begin(PN.incoming_values())) { auto *I = cast(V); Inst->applyMergedLocation(Inst->getDebugLoc(), I->getDebugLoc()); } } // Replace Integer typed PHI PN if the PHI's value is used as a pointer value. // If there is an existing pointer typed PHI that produces the same value as PN, // replace PN and the IntToPtr operation with it. Otherwise, synthesize a new // PHI node: // // Case-1: // bb1: // int_init = PtrToInt(ptr_init) // br label %bb2 // bb2: // int_val = PHI([int_init, %bb1], [int_val_inc, %bb2] // ptr_val = PHI([ptr_init, %bb1], [ptr_val_inc, %bb2] // ptr_val2 = IntToPtr(int_val) // ... // use(ptr_val2) // ptr_val_inc = ... // inc_val_inc = PtrToInt(ptr_val_inc) // // ==> // bb1: // br label %bb2 // bb2: // ptr_val = PHI([ptr_init, %bb1], [ptr_val_inc, %bb2] // ... // use(ptr_val) // ptr_val_inc = ... // // Case-2: // bb1: // int_ptr = BitCast(ptr_ptr) // int_init = Load(int_ptr) // br label %bb2 // bb2: // int_val = PHI([int_init, %bb1], [int_val_inc, %bb2] // ptr_val2 = IntToPtr(int_val) // ... // use(ptr_val2) // ptr_val_inc = ... // inc_val_inc = PtrToInt(ptr_val_inc) // ==> // bb1: // ptr_init = Load(ptr_ptr) // br label %bb2 // bb2: // ptr_val = PHI([ptr_init, %bb1], [ptr_val_inc, %bb2] // ... // use(ptr_val) // ptr_val_inc = ... // ... // bool InstCombinerImpl::foldIntegerTypedPHI(PHINode &PN) { if (!PN.getType()->isIntegerTy()) return false; if (!PN.hasOneUse()) return false; auto *IntToPtr = dyn_cast(PN.user_back()); if (!IntToPtr) return false; // Check if the pointer is actually used as pointer: auto HasPointerUse = [](Instruction *IIP) { for (User *U : IIP->users()) { Value *Ptr = nullptr; if (LoadInst *LoadI = dyn_cast(U)) { Ptr = LoadI->getPointerOperand(); } else if (StoreInst *SI = dyn_cast(U)) { Ptr = SI->getPointerOperand(); } else if (GetElementPtrInst *GI = dyn_cast(U)) { Ptr = GI->getPointerOperand(); } if (Ptr && Ptr == IIP) return true; } return false; }; if (!HasPointerUse(IntToPtr)) return false; if (DL.getPointerSizeInBits(IntToPtr->getAddressSpace()) != DL.getTypeSizeInBits(IntToPtr->getOperand(0)->getType())) return false; SmallVector AvailablePtrVals; for (auto Incoming : zip(PN.blocks(), PN.incoming_values())) { BasicBlock *BB = std::get<0>(Incoming); Value *Arg = std::get<1>(Incoming); // First look backward: if (auto *PI = dyn_cast(Arg)) { AvailablePtrVals.emplace_back(PI->getOperand(0)); continue; } // Next look forward: Value *ArgIntToPtr = nullptr; for (User *U : Arg->users()) { if (isa(U) && U->getType() == IntToPtr->getType() && (DT.dominates(cast(U), BB) || cast(U)->getParent() == BB)) { ArgIntToPtr = U; break; } } if (ArgIntToPtr) { AvailablePtrVals.emplace_back(ArgIntToPtr); continue; } // If Arg is defined by a PHI, allow it. This will also create // more opportunities iteratively. if (isa(Arg)) { AvailablePtrVals.emplace_back(Arg); continue; } // For a single use integer load: auto *LoadI = dyn_cast(Arg); if (!LoadI) return false; if (!LoadI->hasOneUse()) return false; // Push the integer typed Load instruction into the available // value set, and fix it up later when the pointer typed PHI // is synthesized. AvailablePtrVals.emplace_back(LoadI); } // Now search for a matching PHI auto *BB = PN.getParent(); assert(AvailablePtrVals.size() == PN.getNumIncomingValues() && "Not enough available ptr typed incoming values"); PHINode *MatchingPtrPHI = nullptr; unsigned NumPhis = 0; for (PHINode &PtrPHI : BB->phis()) { // FIXME: consider handling this in AggressiveInstCombine if (NumPhis++ > MaxNumPhis) return false; if (&PtrPHI == &PN || PtrPHI.getType() != IntToPtr->getType()) continue; if (any_of(zip(PN.blocks(), AvailablePtrVals), [&](const auto &BlockAndValue) { BasicBlock *BB = std::get<0>(BlockAndValue); Value *V = std::get<1>(BlockAndValue); return PtrPHI.getIncomingValueForBlock(BB) != V; })) continue; MatchingPtrPHI = &PtrPHI; break; } if (MatchingPtrPHI) { assert(MatchingPtrPHI->getType() == IntToPtr->getType() && "Phi's Type does not match with IntToPtr"); // Explicitly replace the inttoptr (rather than inserting a ptrtoint) here, // to make sure another transform can't undo it in the meantime. replaceInstUsesWith(*IntToPtr, MatchingPtrPHI); eraseInstFromFunction(*IntToPtr); eraseInstFromFunction(PN); return true; } // If it requires a conversion for every PHI operand, do not do it. if (all_of(AvailablePtrVals, [&](Value *V) { return (V->getType() != IntToPtr->getType()) || isa(V); })) return false; // If any of the operand that requires casting is a terminator // instruction, do not do it. Similarly, do not do the transform if the value // is PHI in a block with no insertion point, for example, a catchswitch // block, since we will not be able to insert a cast after the PHI. if (any_of(AvailablePtrVals, [&](Value *V) { if (V->getType() == IntToPtr->getType()) return false; auto *Inst = dyn_cast(V); if (!Inst) return false; if (Inst->isTerminator()) return true; auto *BB = Inst->getParent(); if (isa(Inst) && BB->getFirstInsertionPt() == BB->end()) return true; return false; })) return false; PHINode *NewPtrPHI = PHINode::Create( IntToPtr->getType(), PN.getNumIncomingValues(), PN.getName() + ".ptr"); InsertNewInstBefore(NewPtrPHI, PN.getIterator()); SmallDenseMap Casts; for (auto Incoming : zip(PN.blocks(), AvailablePtrVals)) { auto *IncomingBB = std::get<0>(Incoming); auto *IncomingVal = std::get<1>(Incoming); if (IncomingVal->getType() == IntToPtr->getType()) { NewPtrPHI->addIncoming(IncomingVal, IncomingBB); continue; } #ifndef NDEBUG LoadInst *LoadI = dyn_cast(IncomingVal); assert((isa(IncomingVal) || IncomingVal->getType()->isPointerTy() || (LoadI && LoadI->hasOneUse())) && "Can not replace LoadInst with multiple uses"); #endif // Need to insert a BitCast. // For an integer Load instruction with a single use, the load + IntToPtr // cast will be simplified into a pointer load: // %v = load i64, i64* %a.ip, align 8 // %v.cast = inttoptr i64 %v to float ** // ==> // %v.ptrp = bitcast i64 * %a.ip to float ** // %v.cast = load float *, float ** %v.ptrp, align 8 Instruction *&CI = Casts[IncomingVal]; if (!CI) { CI = CastInst::CreateBitOrPointerCast(IncomingVal, IntToPtr->getType(), IncomingVal->getName() + ".ptr"); if (auto *IncomingI = dyn_cast(IncomingVal)) { BasicBlock::iterator InsertPos(IncomingI); InsertPos++; BasicBlock *BB = IncomingI->getParent(); if (isa(IncomingI)) InsertPos = BB->getFirstInsertionPt(); assert(InsertPos != BB->end() && "should have checked above"); InsertNewInstBefore(CI, InsertPos); } else { auto *InsertBB = &IncomingBB->getParent()->getEntryBlock(); InsertNewInstBefore(CI, InsertBB->getFirstInsertionPt()); } } NewPtrPHI->addIncoming(CI, IncomingBB); } // Explicitly replace the inttoptr (rather than inserting a ptrtoint) here, // to make sure another transform can't undo it in the meantime. replaceInstUsesWith(*IntToPtr, NewPtrPHI); eraseInstFromFunction(*IntToPtr); eraseInstFromFunction(PN); return true; } // Remove RoundTrip IntToPtr/PtrToInt Cast on PHI-Operand and // fold Phi-operand to bitcast. Instruction *InstCombinerImpl::foldPHIArgIntToPtrToPHI(PHINode &PN) { // convert ptr2int ( phi[ int2ptr(ptr2int(x))] ) --> ptr2int ( phi [ x ] ) // Make sure all uses of phi are ptr2int. if (!all_of(PN.users(), [](User *U) { return isa(U); })) return nullptr; // Iterating over all operands to check presence of target pointers for // optimization. bool OperandWithRoundTripCast = false; for (unsigned OpNum = 0; OpNum != PN.getNumIncomingValues(); ++OpNum) { if (auto *NewOp = simplifyIntToPtrRoundTripCast(PN.getIncomingValue(OpNum))) { replaceOperand(PN, OpNum, NewOp); OperandWithRoundTripCast = true; } } if (!OperandWithRoundTripCast) return nullptr; return &PN; } /// If we have something like phi [insertvalue(a,b,0), insertvalue(c,d,0)], /// turn this into a phi[a,c] and phi[b,d] and a single insertvalue. Instruction * InstCombinerImpl::foldPHIArgInsertValueInstructionIntoPHI(PHINode &PN) { auto *FirstIVI = cast(PN.getIncomingValue(0)); // Scan to see if all operands are `insertvalue`'s with the same indices, // and all have a single use. for (Value *V : drop_begin(PN.incoming_values())) { auto *I = dyn_cast(V); if (!I || !I->hasOneUser() || I->getIndices() != FirstIVI->getIndices()) return nullptr; } // For each operand of an `insertvalue` std::array NewOperands; for (int OpIdx : {0, 1}) { auto *&NewOperand = NewOperands[OpIdx]; // Create a new PHI node to receive the values the operand has in each // incoming basic block. NewOperand = PHINode::Create( FirstIVI->getOperand(OpIdx)->getType(), PN.getNumIncomingValues(), FirstIVI->getOperand(OpIdx)->getName() + ".pn"); // And populate each operand's PHI with said values. for (auto Incoming : zip(PN.blocks(), PN.incoming_values())) NewOperand->addIncoming( cast(std::get<1>(Incoming))->getOperand(OpIdx), std::get<0>(Incoming)); InsertNewInstBefore(NewOperand, PN.getIterator()); } // And finally, create `insertvalue` over the newly-formed PHI nodes. auto *NewIVI = InsertValueInst::Create(NewOperands[0], NewOperands[1], FirstIVI->getIndices(), PN.getName()); PHIArgMergedDebugLoc(NewIVI, PN); ++NumPHIsOfInsertValues; return NewIVI; } /// If we have something like phi [extractvalue(a,0), extractvalue(b,0)], /// turn this into a phi[a,b] and a single extractvalue. Instruction * InstCombinerImpl::foldPHIArgExtractValueInstructionIntoPHI(PHINode &PN) { auto *FirstEVI = cast(PN.getIncomingValue(0)); // Scan to see if all operands are `extractvalue`'s with the same indices, // and all have a single use. for (Value *V : drop_begin(PN.incoming_values())) { auto *I = dyn_cast(V); if (!I || !I->hasOneUser() || I->getIndices() != FirstEVI->getIndices() || I->getAggregateOperand()->getType() != FirstEVI->getAggregateOperand()->getType()) return nullptr; } // Create a new PHI node to receive the values the aggregate operand has // in each incoming basic block. auto *NewAggregateOperand = PHINode::Create( FirstEVI->getAggregateOperand()->getType(), PN.getNumIncomingValues(), FirstEVI->getAggregateOperand()->getName() + ".pn"); // And populate the PHI with said values. for (auto Incoming : zip(PN.blocks(), PN.incoming_values())) NewAggregateOperand->addIncoming( cast(std::get<1>(Incoming))->getAggregateOperand(), std::get<0>(Incoming)); InsertNewInstBefore(NewAggregateOperand, PN.getIterator()); // And finally, create `extractvalue` over the newly-formed PHI nodes. auto *NewEVI = ExtractValueInst::Create(NewAggregateOperand, FirstEVI->getIndices(), PN.getName()); PHIArgMergedDebugLoc(NewEVI, PN); ++NumPHIsOfExtractValues; return NewEVI; } /// If we have something like phi [add (a,b), add(a,c)] and if a/b/c and the /// adds all have a single user, turn this into a phi and a single binop. Instruction *InstCombinerImpl::foldPHIArgBinOpIntoPHI(PHINode &PN) { Instruction *FirstInst = cast(PN.getIncomingValue(0)); assert(isa(FirstInst) || isa(FirstInst)); unsigned Opc = FirstInst->getOpcode(); Value *LHSVal = FirstInst->getOperand(0); Value *RHSVal = FirstInst->getOperand(1); Type *LHSType = LHSVal->getType(); Type *RHSType = RHSVal->getType(); // Scan to see if all operands are the same opcode, and all have one user. for (Value *V : drop_begin(PN.incoming_values())) { Instruction *I = dyn_cast(V); if (!I || I->getOpcode() != Opc || !I->hasOneUser() || // Verify type of the LHS matches so we don't fold cmp's of different // types. I->getOperand(0)->getType() != LHSType || I->getOperand(1)->getType() != RHSType) return nullptr; // If they are CmpInst instructions, check their predicates if (CmpInst *CI = dyn_cast(I)) if (CI->getPredicate() != cast(FirstInst)->getPredicate()) return nullptr; // Keep track of which operand needs a phi node. if (I->getOperand(0) != LHSVal) LHSVal = nullptr; if (I->getOperand(1) != RHSVal) RHSVal = nullptr; } // If both LHS and RHS would need a PHI, don't do this transformation, // because it would increase the number of PHIs entering the block, // which leads to higher register pressure. This is especially // bad when the PHIs are in the header of a loop. if (!LHSVal && !RHSVal) return nullptr; // Otherwise, this is safe to transform! Value *InLHS = FirstInst->getOperand(0); Value *InRHS = FirstInst->getOperand(1); PHINode *NewLHS = nullptr, *NewRHS = nullptr; if (!LHSVal) { NewLHS = PHINode::Create(LHSType, PN.getNumIncomingValues(), FirstInst->getOperand(0)->getName() + ".pn"); NewLHS->addIncoming(InLHS, PN.getIncomingBlock(0)); InsertNewInstBefore(NewLHS, PN.getIterator()); LHSVal = NewLHS; } if (!RHSVal) { NewRHS = PHINode::Create(RHSType, PN.getNumIncomingValues(), FirstInst->getOperand(1)->getName() + ".pn"); NewRHS->addIncoming(InRHS, PN.getIncomingBlock(0)); InsertNewInstBefore(NewRHS, PN.getIterator()); RHSVal = NewRHS; } // Add all operands to the new PHIs. if (NewLHS || NewRHS) { for (auto Incoming : drop_begin(zip(PN.blocks(), PN.incoming_values()))) { BasicBlock *InBB = std::get<0>(Incoming); Value *InVal = std::get<1>(Incoming); Instruction *InInst = cast(InVal); if (NewLHS) { Value *NewInLHS = InInst->getOperand(0); NewLHS->addIncoming(NewInLHS, InBB); } if (NewRHS) { Value *NewInRHS = InInst->getOperand(1); NewRHS->addIncoming(NewInRHS, InBB); } } } if (CmpInst *CIOp = dyn_cast(FirstInst)) { CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), LHSVal, RHSVal); PHIArgMergedDebugLoc(NewCI, PN); return NewCI; } BinaryOperator *BinOp = cast(FirstInst); BinaryOperator *NewBinOp = BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal); NewBinOp->copyIRFlags(PN.getIncomingValue(0)); for (Value *V : drop_begin(PN.incoming_values())) NewBinOp->andIRFlags(V); PHIArgMergedDebugLoc(NewBinOp, PN); return NewBinOp; } Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) { GetElementPtrInst *FirstInst =cast(PN.getIncomingValue(0)); SmallVector FixedOperands(FirstInst->op_begin(), FirstInst->op_end()); // This is true if all GEP bases are allocas and if all indices into them are // constants. bool AllBasePointersAreAllocas = true; // We don't want to replace this phi if the replacement would require // more than one phi, which leads to higher register pressure. This is // especially bad when the PHIs are in the header of a loop. bool NeededPhi = false; - GEPNoWrapFlags NW = GEPNoWrapFlags::all(); + // Remember flags of the first phi-operand getelementptr. + GEPNoWrapFlags NW = FirstInst->getNoWrapFlags(); // Scan to see if all operands are the same opcode, and all have one user. for (Value *V : drop_begin(PN.incoming_values())) { GetElementPtrInst *GEP = dyn_cast(V); if (!GEP || !GEP->hasOneUser() || GEP->getSourceElementType() != FirstInst->getSourceElementType() || GEP->getNumOperands() != FirstInst->getNumOperands()) return nullptr; NW &= GEP->getNoWrapFlags(); // Keep track of whether or not all GEPs are of alloca pointers. if (AllBasePointersAreAllocas && (!isa(GEP->getOperand(0)) || !GEP->hasAllConstantIndices())) AllBasePointersAreAllocas = false; // Compare the operand lists. for (unsigned Op = 0, E = FirstInst->getNumOperands(); Op != E; ++Op) { if (FirstInst->getOperand(Op) == GEP->getOperand(Op)) continue; // Don't merge two GEPs when two operands differ (introducing phi nodes) // if one of the PHIs has a constant for the index. The index may be // substantially cheaper to compute for the constants, so making it a // variable index could pessimize the path. This also handles the case // for struct indices, which must always be constant. if (isa(FirstInst->getOperand(Op)) || isa(GEP->getOperand(Op))) return nullptr; if (FirstInst->getOperand(Op)->getType() != GEP->getOperand(Op)->getType()) return nullptr; // If we already needed a PHI for an earlier operand, and another operand // also requires a PHI, we'd be introducing more PHIs than we're // eliminating, which increases register pressure on entry to the PHI's // block. if (NeededPhi) return nullptr; FixedOperands[Op] = nullptr; // Needs a PHI. NeededPhi = true; } } // If all of the base pointers of the PHI'd GEPs are from allocas, don't // bother doing this transformation. At best, this will just save a bit of // offset calculation, but all the predecessors will have to materialize the // stack address into a register anyway. We'd actually rather *clone* the // load up into the predecessors so that we have a load of a gep of an alloca, // which can usually all be folded into the load. if (AllBasePointersAreAllocas) return nullptr; // Otherwise, this is safe to transform. Insert PHI nodes for each operand // that is variable. SmallVector OperandPhis(FixedOperands.size()); bool HasAnyPHIs = false; for (unsigned I = 0, E = FixedOperands.size(); I != E; ++I) { if (FixedOperands[I]) continue; // operand doesn't need a phi. Value *FirstOp = FirstInst->getOperand(I); PHINode *NewPN = PHINode::Create(FirstOp->getType(), E, FirstOp->getName() + ".pn"); InsertNewInstBefore(NewPN, PN.getIterator()); NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0)); OperandPhis[I] = NewPN; FixedOperands[I] = NewPN; HasAnyPHIs = true; } // Add all operands to the new PHIs. if (HasAnyPHIs) { for (auto Incoming : drop_begin(zip(PN.blocks(), PN.incoming_values()))) { BasicBlock *InBB = std::get<0>(Incoming); Value *InVal = std::get<1>(Incoming); GetElementPtrInst *InGEP = cast(InVal); for (unsigned Op = 0, E = OperandPhis.size(); Op != E; ++Op) if (PHINode *OpPhi = OperandPhis[Op]) OpPhi->addIncoming(InGEP->getOperand(Op), InBB); } } Value *Base = FixedOperands[0]; GetElementPtrInst *NewGEP = GetElementPtrInst::Create(FirstInst->getSourceElementType(), Base, ArrayRef(FixedOperands).slice(1), NW); PHIArgMergedDebugLoc(NewGEP, PN); return NewGEP; } /// Return true if we know that it is safe to sink the load out of the block /// that defines it. This means that it must be obvious the value of the load is /// not changed from the point of the load to the end of the block it is in. /// /// Finally, it is safe, but not profitable, to sink a load targeting a /// non-address-taken alloca. Doing so will cause us to not promote the alloca /// to a register. static bool isSafeAndProfitableToSinkLoad(LoadInst *L) { BasicBlock::iterator BBI = L->getIterator(), E = L->getParent()->end(); for (++BBI; BBI != E; ++BBI) if (BBI->mayWriteToMemory()) { // Calls that only access inaccessible memory do not block sinking the // load. if (auto *CB = dyn_cast(BBI)) if (CB->onlyAccessesInaccessibleMemory()) continue; return false; } // Check for non-address taken alloca. If not address-taken already, it isn't // profitable to do this xform. if (AllocaInst *AI = dyn_cast(L->getOperand(0))) { bool IsAddressTaken = false; for (User *U : AI->users()) { if (isa(U)) continue; if (StoreInst *SI = dyn_cast(U)) { // If storing TO the alloca, then the address isn't taken. if (SI->getOperand(1) == AI) continue; } IsAddressTaken = true; break; } if (!IsAddressTaken && AI->isStaticAlloca()) return false; } // If this load is a load from a GEP with a constant offset from an alloca, // then we don't want to sink it. In its present form, it will be // load [constant stack offset]. Sinking it will cause us to have to // materialize the stack addresses in each predecessor in a register only to // do a shared load from register in the successor. if (GetElementPtrInst *GEP = dyn_cast(L->getOperand(0))) if (AllocaInst *AI = dyn_cast(GEP->getOperand(0))) if (AI->isStaticAlloca() && GEP->hasAllConstantIndices()) return false; return true; } Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) { LoadInst *FirstLI = cast(PN.getIncomingValue(0)); // Can't forward swifterror through a phi. if (FirstLI->getOperand(0)->isSwiftError()) return nullptr; // FIXME: This is overconservative; this transform is allowed in some cases // for atomic operations. if (FirstLI->isAtomic()) return nullptr; // When processing loads, we need to propagate two bits of information to the // sunk load: whether it is volatile, and what its alignment is. bool IsVolatile = FirstLI->isVolatile(); Align LoadAlignment = FirstLI->getAlign(); const unsigned LoadAddrSpace = FirstLI->getPointerAddressSpace(); // We can't sink the load if the loaded value could be modified between the // load and the PHI. if (FirstLI->getParent() != PN.getIncomingBlock(0) || !isSafeAndProfitableToSinkLoad(FirstLI)) return nullptr; // If the PHI is of volatile loads and the load block has multiple // successors, sinking it would remove a load of the volatile value from // the path through the other successor. if (IsVolatile && FirstLI->getParent()->getTerminator()->getNumSuccessors() != 1) return nullptr; for (auto Incoming : drop_begin(zip(PN.blocks(), PN.incoming_values()))) { BasicBlock *InBB = std::get<0>(Incoming); Value *InVal = std::get<1>(Incoming); LoadInst *LI = dyn_cast(InVal); if (!LI || !LI->hasOneUser() || LI->isAtomic()) return nullptr; // Make sure all arguments are the same type of operation. if (LI->isVolatile() != IsVolatile || LI->getPointerAddressSpace() != LoadAddrSpace) return nullptr; // Can't forward swifterror through a phi. if (LI->getOperand(0)->isSwiftError()) return nullptr; // We can't sink the load if the loaded value could be modified between // the load and the PHI. if (LI->getParent() != InBB || !isSafeAndProfitableToSinkLoad(LI)) return nullptr; LoadAlignment = std::min(LoadAlignment, LI->getAlign()); // If the PHI is of volatile loads and the load block has multiple // successors, sinking it would remove a load of the volatile value from // the path through the other successor. if (IsVolatile && LI->getParent()->getTerminator()->getNumSuccessors() != 1) return nullptr; } // Okay, they are all the same operation. Create a new PHI node of the // correct type, and PHI together all of the LHS's of the instructions. PHINode *NewPN = PHINode::Create(FirstLI->getOperand(0)->getType(), PN.getNumIncomingValues(), PN.getName()+".in"); Value *InVal = FirstLI->getOperand(0); NewPN->addIncoming(InVal, PN.getIncomingBlock(0)); LoadInst *NewLI = new LoadInst(FirstLI->getType(), NewPN, "", IsVolatile, LoadAlignment); unsigned KnownIDs[] = { LLVMContext::MD_tbaa, LLVMContext::MD_range, LLVMContext::MD_invariant_load, LLVMContext::MD_alias_scope, LLVMContext::MD_noalias, LLVMContext::MD_nonnull, LLVMContext::MD_align, LLVMContext::MD_dereferenceable, LLVMContext::MD_dereferenceable_or_null, LLVMContext::MD_access_group, LLVMContext::MD_noundef, }; for (unsigned ID : KnownIDs) NewLI->setMetadata(ID, FirstLI->getMetadata(ID)); // Add all operands to the new PHI and combine TBAA metadata. for (auto Incoming : drop_begin(zip(PN.blocks(), PN.incoming_values()))) { BasicBlock *BB = std::get<0>(Incoming); Value *V = std::get<1>(Incoming); LoadInst *LI = cast(V); combineMetadata(NewLI, LI, KnownIDs, true); Value *NewInVal = LI->getOperand(0); if (NewInVal != InVal) InVal = nullptr; NewPN->addIncoming(NewInVal, BB); } if (InVal) { // The new PHI unions all of the same values together. This is really // common, so we handle it intelligently here for compile-time speed. NewLI->setOperand(0, InVal); delete NewPN; } else { InsertNewInstBefore(NewPN, PN.getIterator()); } // If this was a volatile load that we are merging, make sure to loop through // and mark all the input loads as non-volatile. If we don't do this, we will // insert a new volatile load and the old ones will not be deletable. if (IsVolatile) for (Value *IncValue : PN.incoming_values()) cast(IncValue)->setVolatile(false); PHIArgMergedDebugLoc(NewLI, PN); return NewLI; } /// TODO: This function could handle other cast types, but then it might /// require special-casing a cast from the 'i1' type. See the comment in /// FoldPHIArgOpIntoPHI() about pessimizing illegal integer types. Instruction *InstCombinerImpl::foldPHIArgZextsIntoPHI(PHINode &Phi) { // We cannot create a new instruction after the PHI if the terminator is an // EHPad because there is no valid insertion point. if (Instruction *TI = Phi.getParent()->getTerminator()) if (TI->isEHPad()) return nullptr; // Early exit for the common case of a phi with two operands. These are // handled elsewhere. See the comment below where we check the count of zexts // and constants for more details. unsigned NumIncomingValues = Phi.getNumIncomingValues(); if (NumIncomingValues < 3) return nullptr; // Find the narrower type specified by the first zext. Type *NarrowType = nullptr; for (Value *V : Phi.incoming_values()) { if (auto *Zext = dyn_cast(V)) { NarrowType = Zext->getSrcTy(); break; } } if (!NarrowType) return nullptr; // Walk the phi operands checking that we only have zexts or constants that // we can shrink for free. Store the new operands for the new phi. SmallVector NewIncoming; unsigned NumZexts = 0; unsigned NumConsts = 0; for (Value *V : Phi.incoming_values()) { if (auto *Zext = dyn_cast(V)) { // All zexts must be identical and have one user. if (Zext->getSrcTy() != NarrowType || !Zext->hasOneUser()) return nullptr; NewIncoming.push_back(Zext->getOperand(0)); NumZexts++; } else if (auto *C = dyn_cast(V)) { // Make sure that constants can fit in the new type. Constant *Trunc = getLosslessUnsignedTrunc(C, NarrowType); if (!Trunc) return nullptr; NewIncoming.push_back(Trunc); NumConsts++; } else { // If it's not a cast or a constant, bail out. return nullptr; } } // The more common cases of a phi with no constant operands or just one // variable operand are handled by FoldPHIArgOpIntoPHI() and foldOpIntoPhi() // respectively. foldOpIntoPhi() wants to do the opposite transform that is // performed here. It tries to replicate a cast in the phi operand's basic // block to expose other folding opportunities. Thus, InstCombine will // infinite loop without this check. if (NumConsts == 0 || NumZexts < 2) return nullptr; // All incoming values are zexts or constants that are safe to truncate. // Create a new phi node of the narrow type, phi together all of the new // operands, and zext the result back to the original type. PHINode *NewPhi = PHINode::Create(NarrowType, NumIncomingValues, Phi.getName() + ".shrunk"); for (unsigned I = 0; I != NumIncomingValues; ++I) NewPhi->addIncoming(NewIncoming[I], Phi.getIncomingBlock(I)); InsertNewInstBefore(NewPhi, Phi.getIterator()); return CastInst::CreateZExtOrBitCast(NewPhi, Phi.getType()); } /// If all operands to a PHI node are the same "unary" operator and they all are /// only used by the PHI, PHI together their inputs, and do the operation once, /// to the result of the PHI. Instruction *InstCombinerImpl::foldPHIArgOpIntoPHI(PHINode &PN) { // We cannot create a new instruction after the PHI if the terminator is an // EHPad because there is no valid insertion point. if (Instruction *TI = PN.getParent()->getTerminator()) if (TI->isEHPad()) return nullptr; Instruction *FirstInst = cast(PN.getIncomingValue(0)); if (isa(FirstInst)) return foldPHIArgGEPIntoPHI(PN); if (isa(FirstInst)) return foldPHIArgLoadIntoPHI(PN); if (isa(FirstInst)) return foldPHIArgInsertValueInstructionIntoPHI(PN); if (isa(FirstInst)) return foldPHIArgExtractValueInstructionIntoPHI(PN); // Scan the instruction, looking for input operations that can be folded away. // If all input operands to the phi are the same instruction (e.g. a cast from // the same type or "+42") we can pull the operation through the PHI, reducing // code size and simplifying code. Constant *ConstantOp = nullptr; Type *CastSrcTy = nullptr; if (isa(FirstInst)) { CastSrcTy = FirstInst->getOperand(0)->getType(); // Be careful about transforming integer PHIs. We don't want to pessimize // the code by turning an i32 into an i1293. if (PN.getType()->isIntegerTy() && CastSrcTy->isIntegerTy()) { if (!shouldChangeType(PN.getType(), CastSrcTy)) return nullptr; } } else if (isa(FirstInst) || isa(FirstInst)) { // Can fold binop, compare or shift here if the RHS is a constant, // otherwise call FoldPHIArgBinOpIntoPHI. ConstantOp = dyn_cast(FirstInst->getOperand(1)); if (!ConstantOp) return foldPHIArgBinOpIntoPHI(PN); } else { return nullptr; // Cannot fold this operation. } // Check to see if all arguments are the same operation. for (Value *V : drop_begin(PN.incoming_values())) { Instruction *I = dyn_cast(V); if (!I || !I->hasOneUser() || !I->isSameOperationAs(FirstInst)) return nullptr; if (CastSrcTy) { if (I->getOperand(0)->getType() != CastSrcTy) return nullptr; // Cast operation must match. } else if (I->getOperand(1) != ConstantOp) { return nullptr; } } // Okay, they are all the same operation. Create a new PHI node of the // correct type, and PHI together all of the LHS's of the instructions. PHINode *NewPN = PHINode::Create(FirstInst->getOperand(0)->getType(), PN.getNumIncomingValues(), PN.getName()+".in"); Value *InVal = FirstInst->getOperand(0); NewPN->addIncoming(InVal, PN.getIncomingBlock(0)); // Add all operands to the new PHI. for (auto Incoming : drop_begin(zip(PN.blocks(), PN.incoming_values()))) { BasicBlock *BB = std::get<0>(Incoming); Value *V = std::get<1>(Incoming); Value *NewInVal = cast(V)->getOperand(0); if (NewInVal != InVal) InVal = nullptr; NewPN->addIncoming(NewInVal, BB); } Value *PhiVal; if (InVal) { // The new PHI unions all of the same values together. This is really // common, so we handle it intelligently here for compile-time speed. PhiVal = InVal; delete NewPN; } else { InsertNewInstBefore(NewPN, PN.getIterator()); PhiVal = NewPN; } // Insert and return the new operation. if (CastInst *FirstCI = dyn_cast(FirstInst)) { CastInst *NewCI = CastInst::Create(FirstCI->getOpcode(), PhiVal, PN.getType()); PHIArgMergedDebugLoc(NewCI, PN); return NewCI; } if (BinaryOperator *BinOp = dyn_cast(FirstInst)) { BinOp = BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp); BinOp->copyIRFlags(PN.getIncomingValue(0)); for (Value *V : drop_begin(PN.incoming_values())) BinOp->andIRFlags(V); PHIArgMergedDebugLoc(BinOp, PN); return BinOp; } CmpInst *CIOp = cast(FirstInst); CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), PhiVal, ConstantOp); PHIArgMergedDebugLoc(NewCI, PN); return NewCI; } /// Return true if this PHI node is only used by a PHI node cycle that is dead. static bool isDeadPHICycle(PHINode *PN, SmallPtrSetImpl &PotentiallyDeadPHIs) { if (PN->use_empty()) return true; if (!PN->hasOneUse()) return false; // Remember this node, and if we find the cycle, return. if (!PotentiallyDeadPHIs.insert(PN).second) return true; // Don't scan crazily complex things. if (PotentiallyDeadPHIs.size() == 16) return false; if (PHINode *PU = dyn_cast(PN->user_back())) return isDeadPHICycle(PU, PotentiallyDeadPHIs); return false; } /// Return true if this phi node is always equal to NonPhiInVal. /// This happens with mutually cyclic phi nodes like: /// z = some value; x = phi (y, z); y = phi (x, z) static bool PHIsEqualValue(PHINode *PN, Value *&NonPhiInVal, SmallPtrSetImpl &ValueEqualPHIs) { // See if we already saw this PHI node. if (!ValueEqualPHIs.insert(PN).second) return true; // Don't scan crazily complex things. if (ValueEqualPHIs.size() == 16) return false; // Scan the operands to see if they are either phi nodes or are equal to // the value. for (Value *Op : PN->incoming_values()) { if (PHINode *OpPN = dyn_cast(Op)) { if (!PHIsEqualValue(OpPN, NonPhiInVal, ValueEqualPHIs)) { if (NonPhiInVal) return false; NonPhiInVal = OpPN; } } else if (Op != NonPhiInVal) return false; } return true; } /// Return an existing non-zero constant if this phi node has one, otherwise /// return constant 1. static ConstantInt *getAnyNonZeroConstInt(PHINode &PN) { assert(isa(PN.getType()) && "Expect only integer type phi"); for (Value *V : PN.operands()) if (auto *ConstVA = dyn_cast(V)) if (!ConstVA->isZero()) return ConstVA; return ConstantInt::get(cast(PN.getType()), 1); } namespace { struct PHIUsageRecord { unsigned PHIId; // The ID # of the PHI (something determinstic to sort on) unsigned Shift; // The amount shifted. Instruction *Inst; // The trunc instruction. PHIUsageRecord(unsigned Pn, unsigned Sh, Instruction *User) : PHIId(Pn), Shift(Sh), Inst(User) {} bool operator<(const PHIUsageRecord &RHS) const { if (PHIId < RHS.PHIId) return true; if (PHIId > RHS.PHIId) return false; if (Shift < RHS.Shift) return true; if (Shift > RHS.Shift) return false; return Inst->getType()->getPrimitiveSizeInBits() < RHS.Inst->getType()->getPrimitiveSizeInBits(); } }; struct LoweredPHIRecord { PHINode *PN; // The PHI that was lowered. unsigned Shift; // The amount shifted. unsigned Width; // The width extracted. LoweredPHIRecord(PHINode *Phi, unsigned Sh, Type *Ty) : PN(Phi), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {} // Ctor form used by DenseMap. LoweredPHIRecord(PHINode *Phi, unsigned Sh) : PN(Phi), Shift(Sh), Width(0) {} }; } // namespace namespace llvm { template<> struct DenseMapInfo { static inline LoweredPHIRecord getEmptyKey() { return LoweredPHIRecord(nullptr, 0); } static inline LoweredPHIRecord getTombstoneKey() { return LoweredPHIRecord(nullptr, 1); } static unsigned getHashValue(const LoweredPHIRecord &Val) { return DenseMapInfo::getHashValue(Val.PN) ^ (Val.Shift>>3) ^ (Val.Width>>3); } static bool isEqual(const LoweredPHIRecord &LHS, const LoweredPHIRecord &RHS) { return LHS.PN == RHS.PN && LHS.Shift == RHS.Shift && LHS.Width == RHS.Width; } }; } // namespace llvm /// This is an integer PHI and we know that it has an illegal type: see if it is /// only used by trunc or trunc(lshr) operations. If so, we split the PHI into /// the various pieces being extracted. This sort of thing is introduced when /// SROA promotes an aggregate to large integer values. /// /// TODO: The user of the trunc may be an bitcast to float/double/vector or an /// inttoptr. We should produce new PHIs in the right type. /// Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { // PHIUsers - Keep track of all of the truncated values extracted from a set // of PHIs, along with their offset. These are the things we want to rewrite. SmallVector PHIUsers; // PHIs are often mutually cyclic, so we keep track of a whole set of PHI // nodes which are extracted from. PHIsToSlice is a set we use to avoid // revisiting PHIs, PHIsInspected is a ordered list of PHIs that we need to // check the uses of (to ensure they are all extracts). SmallVector PHIsToSlice; SmallPtrSet PHIsInspected; PHIsToSlice.push_back(&FirstPhi); PHIsInspected.insert(&FirstPhi); for (unsigned PHIId = 0; PHIId != PHIsToSlice.size(); ++PHIId) { PHINode *PN = PHIsToSlice[PHIId]; // Scan the input list of the PHI. If any input is an invoke, and if the // input is defined in the predecessor, then we won't be split the critical // edge which is required to insert a truncate. Because of this, we have to // bail out. for (auto Incoming : zip(PN->blocks(), PN->incoming_values())) { BasicBlock *BB = std::get<0>(Incoming); Value *V = std::get<1>(Incoming); InvokeInst *II = dyn_cast(V); if (!II) continue; if (II->getParent() != BB) continue; // If we have a phi, and if it's directly in the predecessor, then we have // a critical edge where we need to put the truncate. Since we can't // split the edge in instcombine, we have to bail out. return nullptr; } // If the incoming value is a PHI node before a catchswitch, we cannot // extract the value within that BB because we cannot insert any non-PHI // instructions in the BB. for (auto *Pred : PN->blocks()) if (Pred->getFirstInsertionPt() == Pred->end()) return nullptr; for (User *U : PN->users()) { Instruction *UserI = cast(U); // If the user is a PHI, inspect its uses recursively. if (PHINode *UserPN = dyn_cast(UserI)) { if (PHIsInspected.insert(UserPN).second) PHIsToSlice.push_back(UserPN); continue; } // Truncates are always ok. if (isa(UserI)) { PHIUsers.push_back(PHIUsageRecord(PHIId, 0, UserI)); continue; } // Otherwise it must be a lshr which can only be used by one trunc. if (UserI->getOpcode() != Instruction::LShr || !UserI->hasOneUse() || !isa(UserI->user_back()) || !isa(UserI->getOperand(1))) return nullptr; // Bail on out of range shifts. unsigned SizeInBits = UserI->getType()->getScalarSizeInBits(); if (cast(UserI->getOperand(1))->getValue().uge(SizeInBits)) return nullptr; unsigned Shift = cast(UserI->getOperand(1))->getZExtValue(); PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, UserI->user_back())); } } // If we have no users, they must be all self uses, just nuke the PHI. if (PHIUsers.empty()) return replaceInstUsesWith(FirstPhi, PoisonValue::get(FirstPhi.getType())); // If this phi node is transformable, create new PHIs for all the pieces // extracted out of it. First, sort the users by their offset and size. array_pod_sort(PHIUsers.begin(), PHIUsers.end()); LLVM_DEBUG(dbgs() << "SLICING UP PHI: " << FirstPhi << '\n'; for (unsigned I = 1; I != PHIsToSlice.size(); ++I) dbgs() << "AND USER PHI #" << I << ": " << *PHIsToSlice[I] << '\n'); // PredValues - This is a temporary used when rewriting PHI nodes. It is // hoisted out here to avoid construction/destruction thrashing. DenseMap PredValues; // ExtractedVals - Each new PHI we introduce is saved here so we don't // introduce redundant PHIs. DenseMap ExtractedVals; for (unsigned UserI = 0, UserE = PHIUsers.size(); UserI != UserE; ++UserI) { unsigned PHIId = PHIUsers[UserI].PHIId; PHINode *PN = PHIsToSlice[PHIId]; unsigned Offset = PHIUsers[UserI].Shift; Type *Ty = PHIUsers[UserI].Inst->getType(); PHINode *EltPHI; // If we've already lowered a user like this, reuse the previously lowered // value. if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == nullptr) { // Otherwise, Create the new PHI node for this user. EltPHI = PHINode::Create(Ty, PN->getNumIncomingValues(), PN->getName() + ".off" + Twine(Offset), PN->getIterator()); assert(EltPHI->getType() != PN->getType() && "Truncate didn't shrink phi?"); for (auto Incoming : zip(PN->blocks(), PN->incoming_values())) { BasicBlock *Pred = std::get<0>(Incoming); Value *InVal = std::get<1>(Incoming); Value *&PredVal = PredValues[Pred]; // If we already have a value for this predecessor, reuse it. if (PredVal) { EltPHI->addIncoming(PredVal, Pred); continue; } // Handle the PHI self-reuse case. if (InVal == PN) { PredVal = EltPHI; EltPHI->addIncoming(PredVal, Pred); continue; } if (PHINode *InPHI = dyn_cast(PN)) { // If the incoming value was a PHI, and if it was one of the PHIs we // already rewrote it, just use the lowered value. if (Value *Res = ExtractedVals[LoweredPHIRecord(InPHI, Offset, Ty)]) { PredVal = Res; EltPHI->addIncoming(PredVal, Pred); continue; } } // Otherwise, do an extract in the predecessor. Builder.SetInsertPoint(Pred->getTerminator()); Value *Res = InVal; if (Offset) Res = Builder.CreateLShr( Res, ConstantInt::get(InVal->getType(), Offset), "extract"); Res = Builder.CreateTrunc(Res, Ty, "extract.t"); PredVal = Res; EltPHI->addIncoming(Res, Pred); // If the incoming value was a PHI, and if it was one of the PHIs we are // rewriting, we will ultimately delete the code we inserted. This // means we need to revisit that PHI to make sure we extract out the // needed piece. if (PHINode *OldInVal = dyn_cast(InVal)) if (PHIsInspected.count(OldInVal)) { unsigned RefPHIId = find(PHIsToSlice, OldInVal) - PHIsToSlice.begin(); PHIUsers.push_back( PHIUsageRecord(RefPHIId, Offset, cast(Res))); ++UserE; } } PredValues.clear(); LLVM_DEBUG(dbgs() << " Made element PHI for offset " << Offset << ": " << *EltPHI << '\n'); ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)] = EltPHI; } // Replace the use of this piece with the PHI node. replaceInstUsesWith(*PHIUsers[UserI].Inst, EltPHI); } // Replace all the remaining uses of the PHI nodes (self uses and the lshrs) // with poison. Value *Poison = PoisonValue::get(FirstPhi.getType()); for (PHINode *PHI : drop_begin(PHIsToSlice)) replaceInstUsesWith(*PHI, Poison); return replaceInstUsesWith(FirstPhi, Poison); } static Value *simplifyUsingControlFlow(InstCombiner &Self, PHINode &PN, const DominatorTree &DT) { // Simplify the following patterns: // if (cond) // / \ // ... ... // \ / // phi [true] [false] // and // switch (cond) // case v1: / \ case v2: // ... ... // \ / // phi [v1] [v2] // Make sure all inputs are constants. if (!all_of(PN.operands(), [](Value *V) { return isa(V); })) return nullptr; BasicBlock *BB = PN.getParent(); // Do not bother with unreachable instructions. if (!DT.isReachableFromEntry(BB)) return nullptr; // Determine which value the condition of the idom has for which successor. LLVMContext &Context = PN.getContext(); auto *IDom = DT.getNode(BB)->getIDom()->getBlock(); Value *Cond; SmallDenseMap SuccForValue; SmallDenseMap SuccCount; auto AddSucc = [&](ConstantInt *C, BasicBlock *Succ) { SuccForValue[C] = Succ; ++SuccCount[Succ]; }; if (auto *BI = dyn_cast(IDom->getTerminator())) { if (BI->isUnconditional()) return nullptr; Cond = BI->getCondition(); AddSucc(ConstantInt::getTrue(Context), BI->getSuccessor(0)); AddSucc(ConstantInt::getFalse(Context), BI->getSuccessor(1)); } else if (auto *SI = dyn_cast(IDom->getTerminator())) { Cond = SI->getCondition(); ++SuccCount[SI->getDefaultDest()]; for (auto Case : SI->cases()) AddSucc(Case.getCaseValue(), Case.getCaseSuccessor()); } else { return nullptr; } if (Cond->getType() != PN.getType()) return nullptr; // Check that edges outgoing from the idom's terminators dominate respective // inputs of the Phi. std::optional Invert; for (auto Pair : zip(PN.incoming_values(), PN.blocks())) { auto *Input = cast(std::get<0>(Pair)); BasicBlock *Pred = std::get<1>(Pair); auto IsCorrectInput = [&](ConstantInt *Input) { // The input needs to be dominated by the corresponding edge of the idom. // This edge cannot be a multi-edge, as that would imply that multiple // different condition values follow the same edge. auto It = SuccForValue.find(Input); return It != SuccForValue.end() && SuccCount[It->second] == 1 && DT.dominates(BasicBlockEdge(IDom, It->second), BasicBlockEdge(Pred, BB)); }; // Depending on the constant, the condition may need to be inverted. bool NeedsInvert; if (IsCorrectInput(Input)) NeedsInvert = false; else if (IsCorrectInput(cast(ConstantExpr::getNot(Input)))) NeedsInvert = true; else return nullptr; // Make sure the inversion requirement is always the same. if (Invert && *Invert != NeedsInvert) return nullptr; Invert = NeedsInvert; } if (!*Invert) return Cond; // This Phi is actually opposite to branching condition of IDom. We invert // the condition that will potentially open up some opportunities for // sinking. auto InsertPt = BB->getFirstInsertionPt(); if (InsertPt != BB->end()) { Self.Builder.SetInsertPoint(&*BB, InsertPt); return Self.Builder.CreateNot(Cond); } return nullptr; } // Fold iv = phi(start, iv.next = iv2.next op start) // where iv2 = phi(iv2.start, iv2.next = iv2 + iv2.step) // and iv2.start op start = start // to iv = iv2 op start static Value *foldDependentIVs(PHINode &PN, IRBuilderBase &Builder) { BasicBlock *BB = PN.getParent(); if (PN.getNumIncomingValues() != 2) return nullptr; Value *Start; Instruction *IvNext; BinaryOperator *Iv2Next; auto MatchOuterIV = [&](Value *V1, Value *V2) { if (match(V2, m_c_BinOp(m_Specific(V1), m_BinOp(Iv2Next))) || match(V2, m_GEP(m_Specific(V1), m_BinOp(Iv2Next)))) { Start = V1; IvNext = cast(V2); return true; } return false; }; if (!MatchOuterIV(PN.getIncomingValue(0), PN.getIncomingValue(1)) && !MatchOuterIV(PN.getIncomingValue(1), PN.getIncomingValue(0))) return nullptr; PHINode *Iv2; Value *Iv2Start, *Iv2Step; if (!matchSimpleRecurrence(Iv2Next, Iv2, Iv2Start, Iv2Step) || Iv2->getParent() != BB) return nullptr; auto *BO = dyn_cast(IvNext); Constant *Identity = BO ? ConstantExpr::getBinOpIdentity(BO->getOpcode(), Iv2Start->getType()) : Constant::getNullValue(Iv2Start->getType()); if (Iv2Start != Identity) return nullptr; Builder.SetInsertPoint(&*BB, BB->getFirstInsertionPt()); if (!BO) { auto *GEP = cast(IvNext); return Builder.CreateGEP(GEP->getSourceElementType(), Start, Iv2, "", cast(IvNext)->getNoWrapFlags()); } assert(BO->isCommutative() && "Must be commutative"); Value *Res = Builder.CreateBinOp(BO->getOpcode(), Iv2, Start); cast(Res)->copyIRFlags(BO); return Res; } // PHINode simplification // Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) { if (Value *V = simplifyInstruction(&PN, SQ.getWithInstruction(&PN))) return replaceInstUsesWith(PN, V); if (Instruction *Result = foldPHIArgZextsIntoPHI(PN)) return Result; if (Instruction *Result = foldPHIArgIntToPtrToPHI(PN)) return Result; // If all PHI operands are the same operation, pull them through the PHI, // reducing code size. auto *Inst0 = dyn_cast(PN.getIncomingValue(0)); auto *Inst1 = dyn_cast(PN.getIncomingValue(1)); if (Inst0 && Inst1 && Inst0->getOpcode() == Inst1->getOpcode() && Inst0->hasOneUser()) if (Instruction *Result = foldPHIArgOpIntoPHI(PN)) return Result; // If the incoming values are pointer casts of the same original value, // replace the phi with a single cast iff we can insert a non-PHI instruction. if (PN.getType()->isPointerTy() && PN.getParent()->getFirstInsertionPt() != PN.getParent()->end()) { Value *IV0 = PN.getIncomingValue(0); Value *IV0Stripped = IV0->stripPointerCasts(); // Set to keep track of values known to be equal to IV0Stripped after // stripping pointer casts. SmallPtrSet CheckedIVs; CheckedIVs.insert(IV0); if (IV0 != IV0Stripped && all_of(PN.incoming_values(), [&CheckedIVs, IV0Stripped](Value *IV) { return !CheckedIVs.insert(IV).second || IV0Stripped == IV->stripPointerCasts(); })) { return CastInst::CreatePointerCast(IV0Stripped, PN.getType()); } } // If this is a trivial cycle in the PHI node graph, remove it. Basically, if // this PHI only has a single use (a PHI), and if that PHI only has one use (a // PHI)... break the cycle. if (PN.hasOneUse()) { if (foldIntegerTypedPHI(PN)) return nullptr; Instruction *PHIUser = cast(PN.user_back()); if (PHINode *PU = dyn_cast(PHIUser)) { SmallPtrSet PotentiallyDeadPHIs; PotentiallyDeadPHIs.insert(&PN); if (isDeadPHICycle(PU, PotentiallyDeadPHIs)) return replaceInstUsesWith(PN, PoisonValue::get(PN.getType())); } // If this phi has a single use, and if that use just computes a value for // the next iteration of a loop, delete the phi. This occurs with unused // induction variables, e.g. "for (int j = 0; ; ++j);". Detecting this // common case here is good because the only other things that catch this // are induction variable analysis (sometimes) and ADCE, which is only run // late. if (PHIUser->hasOneUse() && (isa(PHIUser) || isa(PHIUser) || isa(PHIUser)) && PHIUser->user_back() == &PN) { return replaceInstUsesWith(PN, PoisonValue::get(PN.getType())); } } // When a PHI is used only to be compared with zero, it is safe to replace // an incoming value proved as known nonzero with any non-zero constant. // For example, in the code below, the incoming value %v can be replaced // with any non-zero constant based on the fact that the PHI is only used to // be compared with zero and %v is a known non-zero value: // %v = select %cond, 1, 2 // %p = phi [%v, BB] ... // icmp eq, %p, 0 // FIXME: To be simple, handle only integer type for now. // This handles a small number of uses to keep the complexity down, and an // icmp(or(phi)) can equally be replaced with any non-zero constant as the // "or" will only add bits. if (!PN.hasNUsesOrMore(3)) { SmallVector DropPoisonFlags; bool AllUsesOfPhiEndsInCmp = all_of(PN.users(), [&](User *U) { auto *CmpInst = dyn_cast(U); if (!CmpInst) { // This is always correct as OR only add bits and we are checking // against 0. if (U->hasOneUse() && match(U, m_c_Or(m_Specific(&PN), m_Value()))) { DropPoisonFlags.push_back(cast(U)); CmpInst = dyn_cast(U->user_back()); } } if (!CmpInst || !isa(PN.getType()) || !CmpInst->isEquality() || !match(CmpInst->getOperand(1), m_Zero())) { return false; } return true; }); // All uses of PHI results in a compare with zero. if (AllUsesOfPhiEndsInCmp) { ConstantInt *NonZeroConst = nullptr; bool MadeChange = false; for (unsigned I = 0, E = PN.getNumIncomingValues(); I != E; ++I) { Instruction *CtxI = PN.getIncomingBlock(I)->getTerminator(); Value *VA = PN.getIncomingValue(I); if (isKnownNonZero(VA, getSimplifyQuery().getWithInstruction(CtxI))) { if (!NonZeroConst) NonZeroConst = getAnyNonZeroConstInt(PN); if (NonZeroConst != VA) { replaceOperand(PN, I, NonZeroConst); // The "disjoint" flag may no longer hold after the transform. for (Instruction *I : DropPoisonFlags) I->dropPoisonGeneratingFlags(); MadeChange = true; } } } if (MadeChange) return &PN; } } // We sometimes end up with phi cycles that non-obviously end up being the // same value, for example: // z = some value; x = phi (y, z); y = phi (x, z) // where the phi nodes don't necessarily need to be in the same block. Do a // quick check to see if the PHI node only contains a single non-phi value, if // so, scan to see if the phi cycle is actually equal to that value. If the // phi has no non-phi values then allow the "NonPhiInVal" to be set later if // one of the phis itself does not have a single input. { unsigned InValNo = 0, NumIncomingVals = PN.getNumIncomingValues(); // Scan for the first non-phi operand. while (InValNo != NumIncomingVals && isa(PN.getIncomingValue(InValNo))) ++InValNo; Value *NonPhiInVal = InValNo != NumIncomingVals ? PN.getIncomingValue(InValNo) : nullptr; // Scan the rest of the operands to see if there are any conflicts, if so // there is no need to recursively scan other phis. if (NonPhiInVal) for (++InValNo; InValNo != NumIncomingVals; ++InValNo) { Value *OpVal = PN.getIncomingValue(InValNo); if (OpVal != NonPhiInVal && !isa(OpVal)) break; } // If we scanned over all operands, then we have one unique value plus // phi values. Scan PHI nodes to see if they all merge in each other or // the value. if (InValNo == NumIncomingVals) { SmallPtrSet ValueEqualPHIs; if (PHIsEqualValue(&PN, NonPhiInVal, ValueEqualPHIs)) return replaceInstUsesWith(PN, NonPhiInVal); } } // If there are multiple PHIs, sort their operands so that they all list // the blocks in the same order. This will help identical PHIs be eliminated // by other passes. Other passes shouldn't depend on this for correctness // however. auto Res = PredOrder.try_emplace(PN.getParent()); if (!Res.second) { const auto &Preds = Res.first->second; for (unsigned I = 0, E = PN.getNumIncomingValues(); I != E; ++I) { BasicBlock *BBA = PN.getIncomingBlock(I); BasicBlock *BBB = Preds[I]; if (BBA != BBB) { Value *VA = PN.getIncomingValue(I); unsigned J = PN.getBasicBlockIndex(BBB); Value *VB = PN.getIncomingValue(J); PN.setIncomingBlock(I, BBB); PN.setIncomingValue(I, VB); PN.setIncomingBlock(J, BBA); PN.setIncomingValue(J, VA); // NOTE: Instcombine normally would want us to "return &PN" if we // modified any of the operands of an instruction. However, since we // aren't adding or removing uses (just rearranging them) we don't do // this in this case. } } } else { // Remember the block order of the first encountered phi node. append_range(Res.first->second, PN.blocks()); } // Is there an identical PHI node in this basic block? for (PHINode &IdenticalPN : PN.getParent()->phis()) { // Ignore the PHI node itself. if (&IdenticalPN == &PN) continue; // Note that even though we've just canonicalized this PHI, due to the // worklist visitation order, there are no guarantess that *every* PHI // has been canonicalized, so we can't just compare operands ranges. if (!PN.isIdenticalToWhenDefined(&IdenticalPN)) continue; // Just use that PHI instead then. ++NumPHICSEs; return replaceInstUsesWith(PN, &IdenticalPN); } // If this is an integer PHI and we know that it has an illegal type, see if // it is only used by trunc or trunc(lshr) operations. If so, we split the // PHI into the various pieces being extracted. This sort of thing is // introduced when SROA promotes an aggregate to a single large integer type. if (PN.getType()->isIntegerTy() && !DL.isLegalInteger(PN.getType()->getPrimitiveSizeInBits())) if (Instruction *Res = SliceUpIllegalIntegerPHI(PN)) return Res; // Ultimately, try to replace this Phi with a dominating condition. if (auto *V = simplifyUsingControlFlow(*this, PN, DT)) return replaceInstUsesWith(PN, V); if (Value *Res = foldDependentIVs(PN, Builder)) return replaceInstUsesWith(PN, Res); return nullptr; }