Arthur She | d8cd8db | 2024-03-11 09:54:24 -0700 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
| 2 | # |
| 3 | # Copyright (c) 2022 Google LLC. All rights reserved. |
| 4 | # |
| 5 | # SPDX-License-Identifier: BSD-3-Clause |
| 6 | |
| 7 | # quick hacky script to check patches if they are candidates for lts. it checks |
| 8 | # only the non-merge commits. |
| 9 | |
| 10 | import os |
| 11 | import git |
| 12 | import re |
| 13 | import sys |
| 14 | import csv |
| 15 | import argparse |
| 16 | import json |
| 17 | import subprocess |
| 18 | from io import StringIO |
| 19 | from unidiff import PatchSet |
| 20 | from config import MESSAGE_TOKENS, CPU_PATH_TOKEN, CPU_ERRATA_TOKEN, DOC_PATH_TOKEN, DOC_ERRATA_TOKEN |
| 21 | |
| 22 | global_debug = False |
| 23 | def debug_print(*args, **kwargs): |
| 24 | global global_var |
| 25 | if global_debug: |
| 26 | print(*args, **kwargs) |
| 27 | |
| 28 | def contains_re(pf, tok): |
| 29 | for hnk in pf: |
| 30 | for ln in hnk: |
| 31 | if ln.is_context: |
| 32 | continue |
| 33 | # here means the line is either added or removed |
| 34 | txt = ln.value.strip() |
| 35 | if tok.search(txt) is not None: |
| 36 | return True |
| 37 | |
| 38 | return False |
| 39 | |
| 40 | def process_ps(ps): |
| 41 | score = 0 |
| 42 | |
| 43 | cpu_tok = re.compile(CPU_PATH_TOKEN) |
| 44 | doc_tok = re.compile(DOC_PATH_TOKEN) |
| 45 | |
| 46 | for pf in ps: |
| 47 | if pf.is_binary_file or not pf.is_modified_file: |
| 48 | continue |
| 49 | if cpu_tok.search(pf.path) is not None: |
| 50 | debug_print("* change found in cpu path:", pf.path); |
| 51 | cpu_tok = re.compile(CPU_ERRATA_TOKEN) |
| 52 | if contains_re(pf, cpu_tok): |
| 53 | score = score + 1 |
| 54 | debug_print(" found", CPU_ERRATA_TOKEN) |
| 55 | |
| 56 | if doc_tok.search(pf.path) is not None: |
| 57 | debug_print("* change found in macros doc path:", pf.path); |
| 58 | doc_tok = re.compile(DOC_ERRATA_TOKEN) |
| 59 | if contains_re(pf, doc_tok): |
| 60 | score = score + 1 |
| 61 | debug_print(" found", DOC_ERRATA_TOKEN) |
| 62 | |
| 63 | return score |
| 64 | |
| 65 | def query_gerrit(gerrit_user, ssh_key_path, change_id): |
| 66 | ssh_command = [ |
| 67 | "ssh", |
| 68 | "-o", "UserKnownHostsFile=/dev/null", |
| 69 | "-o", "StrictHostKeyChecking=no", |
| 70 | "-o", "PubkeyAcceptedKeyTypes=+ssh-rsa", |
| 71 | "-p", "29418", |
| 72 | "-i", ssh_key_path, |
| 73 | f"{gerrit_user}@review.trustedfirmware.org", |
| 74 | f"gerrit query --format=JSON change:'{change_id}'", |
| 75 | "repo:'TF-A/trusted-firmware-a'" |
| 76 | ] |
| 77 | |
| 78 | try: |
| 79 | result = subprocess.run(ssh_command, capture_output=True, text=True, check=True) |
| 80 | output = result.stdout.strip().split("\n") |
| 81 | changes = [json.loads(line) for line in output if line.strip()] |
| 82 | # Create a dictionary with branch as key and URL as value |
| 83 | branches_urls = {change["branch"]: change["url"] for change in changes if "branch" in change and "url" in change} |
| 84 | return branches_urls |
| 85 | |
| 86 | except subprocess.CalledProcessError as e: |
| 87 | print("Error executing SSH command:", e) |
| 88 | return {} |
| 89 | |
| 90 | # REBASE_DEPTH is number of commits from tip of the LTS branch that we need |
| 91 | # to check to find the commit that the current patch set is based on |
| 92 | REBASE_DEPTH = 20 |
| 93 | |
| 94 | |
| 95 | ## TODO: for case like 921081049ec3 where we need to refactor first for security |
| 96 | # patch to be applied then we should: |
| 97 | # 1. find the security patch |
| 98 | # 2. from that patch find CVE number if any |
| 99 | # 3. look for all patches that contain that CVE number in commit message |
| 100 | |
| 101 | ## TODO: similar to errata macros and rst file additions, we have CVE macros and rst file |
| 102 | # additions. so we can use similar logic for that. |
| 103 | |
| 104 | ## TODO: for security we should look for CVE numbed regex match and if found flag it |
| 105 | def main(): |
Arthur She | 6309162 | 2024-04-12 06:37:35 -0700 | [diff] [blame] | 106 | at_least_one_match = False |
Arthur She | d8cd8db | 2024-03-11 09:54:24 -0700 | [diff] [blame] | 107 | parser = argparse.ArgumentParser(prog="lts-triage.py", description="check patches for LTS candidacy") |
| 108 | parser.add_argument("--repo", required=True, help="path to tf-a git repo") |
| 109 | parser.add_argument("--csv_path", required=True, help="path including the filename for CSV file") |
| 110 | parser.add_argument("--lts", required=True, help="LTS branch, ex. lts-v2.8") |
| 111 | parser.add_argument("--gerrit_user", required=True, help="The user id to perform the Gerrit query") |
| 112 | parser.add_argument("--ssh_keyfile", required=True, help="The SSH keyfile") |
| 113 | parser.add_argument("--debug", help="print debug logs", action="store_true") |
| 114 | |
| 115 | args = parser.parse_args() |
| 116 | lts_branch = args.lts |
| 117 | gerrit_user = args.gerrit_user |
| 118 | ssh_keyfile = args.ssh_keyfile |
| 119 | global global_debug |
| 120 | global_debug = args.debug |
| 121 | |
| 122 | csv_columns = ["index", "commit id in the integration branch", "commit summary", |
| 123 | "score", "Gerrit Change-Id", "patch link for the LTS branch", |
| 124 | "patch link for the integration branch"] |
| 125 | csv_data = [] |
| 126 | idx = 1 |
| 127 | |
| 128 | repo = git.Repo(args.repo) |
| 129 | |
| 130 | # collect the LTS hashes in a list |
| 131 | lts_change_ids = set() # Set to store Gerrit Change-Ids from the LTS branch |
| 132 | |
| 133 | for cmt in repo.iter_commits(lts_branch): |
| 134 | # Extract Gerrit Change-Id from the commit message |
| 135 | change_id_match = re.search(r'Change-Id:\s*(\w+)', cmt.message) |
| 136 | if change_id_match: |
| 137 | lts_change_ids.add(change_id_match.group(1)) |
| 138 | |
| 139 | if len(lts_change_ids) >= REBASE_DEPTH: |
| 140 | break |
| 141 | |
| 142 | for cmt in repo.iter_commits('integration'): |
| 143 | score = 0 |
| 144 | |
| 145 | # if we find a same Change-Id among the ones we collected from the LTS branch |
| 146 | # then we have seen all the new patches in the integration branch, so we should exit. |
| 147 | change_id_match = re.search(r'Change-Id:\s*(\w+)', cmt.message) |
| 148 | if change_id_match: |
| 149 | change_id = change_id_match.group(1) |
| 150 | if change_id in lts_change_ids: |
| 151 | print("## stopping because found common Gerrit Change-Id between the two branches: ", change_id) |
| 152 | break; |
| 153 | |
| 154 | # don't process merge commits |
| 155 | if len(cmt.parents) > 1: |
| 156 | continue |
| 157 | |
| 158 | tok = re.compile(MESSAGE_TOKENS, re.IGNORECASE) |
| 159 | if tok.search(cmt.message) is not None: |
| 160 | debug_print("## commit message match") |
| 161 | score = score + 1 |
| 162 | |
| 163 | diff_text = repo.git.diff(cmt.hexsha + "~1", cmt.hexsha, ignore_blank_lines=True, ignore_space_at_eol=True) |
| 164 | ps = PatchSet(StringIO(diff_text)) |
| 165 | debug_print("# score before process_ps:", score) |
| 166 | score = score + process_ps(ps) |
| 167 | debug_print("# score after process_ps:", score) |
| 168 | |
| 169 | ln = f"{cmt.summary}: {score}" |
| 170 | print(ln) |
| 171 | |
| 172 | if score > 0: |
| 173 | gerrit_links = query_gerrit(gerrit_user, ssh_keyfile, change_id) |
| 174 | # Append data to CSV |
| 175 | csv_data.append({ |
| 176 | "index": idx, |
| 177 | "commit id in the integration branch": cmt.hexsha, |
| 178 | "commit summary": cmt.summary, |
| 179 | "score": score, |
| 180 | "Gerrit Change-Id": change_id, |
| 181 | "patch link for the LTS branch": gerrit_links.get(lts_branch, "N/A"), |
| 182 | "patch link for the integration branch": gerrit_links.get("integration", "N/A") |
| 183 | }) |
| 184 | idx += 1 |
| 185 | at_least_one_match = True |
| 186 | |
| 187 | if at_least_one_match == True: |
| 188 | try: |
| 189 | with open(args.csv_path, "w", newline='') as csvfile: |
| 190 | writer = csv.DictWriter(csvfile, fieldnames=csv_columns) |
| 191 | writer.writeheader() |
| 192 | for data in csv_data: |
| 193 | writer.writerow(data) |
| 194 | except: |
| 195 | print("\n\nERROR: Couldn't open CSV file due to error: ", sys.exc_info()[0]) |
| 196 | |
| 197 | if __name__ == '__main__': |
| 198 | main() |