patman: Support checking for review tags in patchwork

Before sending out a new version of a series for review, it is important
to add any review tags (e.g. Reviewed-by, Acked-by) collected by
patchwork. Otherwise people waste time reviewing the same patch
repeatedly, become frustrated and stop reviewing your patches.

To help with this, add a new 'status' subcommand that checks patchwork
for review tags, showing those which are not present in the local branch.

This allows users to see what new review tags have been received and then
add them.

Sample output:
   $ patman status
     1 Subject 1
       Reviewed-by: Joe Bloggs <joe@napierwallies.co.nz>
     2 Subject 2
       Tested-by: Lord Edmund Blackaddër <weasel@blackadder.org>
       Reviewed-by: Fred Bloggs <f.bloggs@napier.net>
     + Reviewed-by: Mary Bloggs <mary@napierwallies.co.nz>
   1 new response available in patchwork

The '+' indicates a new tag. Colours are used to make it easier to read.

Signed-off-by: Simon Glass <sjg@chromium.org>
diff --git a/tools/patman/status.py b/tools/patman/status.py
new file mode 100644
index 0000000..f41b2d4
--- /dev/null
+++ b/tools/patman/status.py
@@ -0,0 +1,356 @@
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Copyright 2020 Google LLC
+#
+"""Talks to the patchwork service to figure out what patches have been reviewed
+and commented on.
+"""
+
+import collections
+import concurrent.futures
+from itertools import repeat
+import re
+import requests
+
+from patman.patchstream import PatchStream
+from patman import terminal
+from patman import tout
+
+# Patches which are part of a multi-patch series are shown with a prefix like
+# [prefix, version, sequence], for example '[RFC, v2, 3/5]'. All but the last
+# part is optional. This decodes the string into groups. For single patches
+# the [] part is not present:
+# Groups: (ignore, ignore, ignore, prefix, version, sequence, subject)
+RE_PATCH = re.compile(r'(\[(((.*),)?(.*),)?(.*)\]\s)?(.*)$')
+
+# This decodes the sequence string into a patch number and patch count
+RE_SEQ = re.compile(r'(\d+)/(\d+)')
+
+def to_int(vals):
+    """Convert a list of strings into integers, using 0 if not an integer
+
+    Args:
+        vals (list): List of strings
+
+    Returns:
+        list: List of integers, one for each input string
+    """
+    out = [int(val) if val.isdigit() else 0 for val in vals]
+    return out
+
+
+class Patch(dict):
+    """Models a patch in patchwork
+
+    This class records information obtained from patchwork
+
+    Some of this information comes from the 'Patch' column:
+
+        [RFC,v2,1/3] dm: Driver and uclass changes for tiny-dm
+
+    This shows the prefix, version, seq, count and subject.
+
+    The other properties come from other columns in the display.
+
+    Properties:
+        pid (str): ID of the patch (typically an integer)
+        seq (int): Sequence number within series (1=first) parsed from sequence
+            string
+        count (int): Number of patches in series, parsed from sequence string
+        raw_subject (str): Entire subject line, e.g.
+            "[1/2,v2] efi_loader: Sort header file ordering"
+        prefix (str): Prefix string or None (e.g. 'RFC')
+        version (str): Version string or None (e.g. 'v2')
+        raw_subject (str): Raw patch subject
+        subject (str): Patch subject with [..] part removed (same as commit
+            subject)
+    """
+    def __init__(self, pid):
+        super().__init__()
+        self.id = pid  # Use 'id' to match what the Rest API provides
+        self.seq = None
+        self.count = None
+        self.prefix = None
+        self.version = None
+        self.raw_subject = None
+        self.subject = None
+
+    # These make us more like a dictionary
+    def __setattr__(self, name, value):
+        self[name] = value
+
+    def __getattr__(self, name):
+        return self[name]
+
+    def __hash__(self):
+        return hash(frozenset(self.items()))
+
+    def __str__(self):
+        return self.raw_subject
+
+    def parse_subject(self, raw_subject):
+        """Parse the subject of a patch into its component parts
+
+        See RE_PATCH for details. The parsed info is placed into seq, count,
+        prefix, version, subject
+
+        Args:
+            raw_subject (str): Subject string to parse
+
+        Raises:
+            ValueError: the subject cannot be parsed
+        """
+        self.raw_subject = raw_subject.strip()
+        mat = RE_PATCH.search(raw_subject.strip())
+        if not mat:
+            raise ValueError("Cannot parse subject '%s'" % raw_subject)
+        self.prefix, self.version, seq_info, self.subject = mat.groups()[3:]
+        mat_seq = RE_SEQ.match(seq_info) if seq_info else False
+        if mat_seq is None:
+            self.version = seq_info
+            seq_info = None
+        if self.version and not self.version.startswith('v'):
+            self.prefix = self.version
+            self.version = None
+        if seq_info:
+            if mat_seq:
+                self.seq = int(mat_seq.group(1))
+                self.count = int(mat_seq.group(2))
+        else:
+            self.seq = 1
+            self.count = 1
+
+def compare_with_series(series, patches):
+    """Compare a list of patches with a series it came from
+
+    This prints any problems as warnings
+
+    Args:
+        series (Series): Series to compare against
+        patches (:type: list of Patch): list of Patch objects to compare with
+
+    Returns:
+        tuple
+            dict:
+                key: Commit number (0...n-1)
+                value: Patch object for that commit
+            dict:
+                key: Patch number  (0...n-1)
+                value: Commit object for that patch
+    """
+    # Check the names match
+    warnings = []
+    patch_for_commit = {}
+    all_patches = set(patches)
+    for seq, cmt in enumerate(series.commits):
+        pmatch = [p for p in all_patches if p.subject == cmt.subject]
+        if len(pmatch) == 1:
+            patch_for_commit[seq] = pmatch[0]
+            all_patches.remove(pmatch[0])
+        elif len(pmatch) > 1:
+            warnings.append("Multiple patches match commit %d ('%s'):\n   %s" %
+                            (seq + 1, cmt.subject,
+                             '\n   '.join([p.subject for p in pmatch])))
+        else:
+            warnings.append("Cannot find patch for commit %d ('%s')" %
+                            (seq + 1, cmt.subject))
+
+
+    # Check the names match
+    commit_for_patch = {}
+    all_commits = set(series.commits)
+    for seq, patch in enumerate(patches):
+        cmatch = [c for c in all_commits if c.subject == patch.subject]
+        if len(cmatch) == 1:
+            commit_for_patch[seq] = cmatch[0]
+            all_commits.remove(cmatch[0])
+        elif len(cmatch) > 1:
+            warnings.append("Multiple commits match patch %d ('%s'):\n   %s" %
+                            (seq + 1, patch.subject,
+                             '\n   '.join([c.subject for c in cmatch])))
+        else:
+            warnings.append("Cannot find commit for patch %d ('%s')" %
+                            (seq + 1, patch.subject))
+
+    return patch_for_commit, commit_for_patch, warnings
+
+def call_rest_api(subpath):
+    """Call the patchwork API and return the result as JSON
+
+    Args:
+        subpath (str): URL subpath to use
+
+    Returns:
+        dict: Json result
+
+    Raises:
+        ValueError: the URL could not be read
+    """
+    url = 'https://patchwork.ozlabs.org/api/1.2/%s' % subpath
+    response = requests.get(url)
+    if response.status_code != 200:
+        raise ValueError("Could not read URL '%s'" % url)
+    return response.json()
+
+def collect_patches(series, series_id, rest_api=call_rest_api):
+    """Collect patch information about a series from patchwork
+
+    Uses the Patchwork REST API to collect information provided by patchwork
+    about the status of each patch.
+
+    Args:
+        series (Series): Series object corresponding to the local branch
+            containing the series
+        series_id (str): Patch series ID number
+        rest_api (function): API function to call to access Patchwork, for
+            testing
+
+    Returns:
+        list: List of patches sorted by sequence number, each a Patch object
+
+    Raises:
+        ValueError: if the URL could not be read or the web page does not follow
+            the expected structure
+    """
+    data = rest_api('series/%s/' % series_id)
+
+    # Get all the rows, which are patches
+    patch_dict = data['patches']
+    count = len(patch_dict)
+    num_commits = len(series.commits)
+    if count != num_commits:
+        tout.Warning('Warning: Patchwork reports %d patches, series has %d' %
+                     (count, num_commits))
+
+    patches = []
+
+    # Work through each row (patch) one at a time, collecting the information
+    warn_count = 0
+    for pw_patch in patch_dict:
+        patch = Patch(pw_patch['id'])
+        patch.parse_subject(pw_patch['name'])
+        patches.append(patch)
+    if warn_count > 1:
+        tout.Warning('   (total of %d warnings)' % warn_count)
+
+    # Sort patches by patch number
+    patches = sorted(patches, key=lambda x: x.seq)
+    return patches
+
+def find_new_responses(new_rtag_list, seq, cmt, patch, rest_api=call_rest_api):
+    """Find new rtags collected by patchwork that we don't know about
+
+    This is designed to be run in parallel, once for each commit/patch
+
+    Args:
+        new_rtag_list (list): New rtags are written to new_rtag_list[seq]
+            list, each a dict:
+                key: Response tag (e.g. 'Reviewed-by')
+                value: Set of people who gave that response, each a name/email
+                    string
+        seq (int): Position in new_rtag_list to update
+        cmt (Commit): Commit object for this commit
+        patch (Patch): Corresponding Patch object for this patch
+        rest_api (function): API function to call to access Patchwork, for
+            testing
+    """
+    if not patch:
+        return
+
+    # Get the content for the patch email itself as well as all comments
+    data = rest_api('patches/%s/' % patch.id)
+    pstrm = PatchStream.process_text(data['content'], True)
+
+    rtags = collections.defaultdict(set)
+    for response, people in pstrm.commit.rtags.items():
+        rtags[response].update(people)
+
+    data = rest_api('patches/%s/comments/' % patch.id)
+
+    for comment in data:
+        pstrm = PatchStream.process_text(comment['content'], True)
+        for response, people in pstrm.commit.rtags.items():
+            rtags[response].update(people)
+
+    # Find the tags that are not in the commit
+    new_rtags = collections.defaultdict(set)
+    base_rtags = cmt.rtags
+    for tag, people in rtags.items():
+        for who in people:
+            is_new = (tag not in base_rtags or
+                      who not in base_rtags[tag])
+            if is_new:
+                new_rtags[tag].add(who)
+    new_rtag_list[seq] = new_rtags
+
+def show_responses(rtags, indent, is_new):
+    """Show rtags collected
+
+    Args:
+        rtags (dict): review tags to show
+            key: Response tag (e.g. 'Reviewed-by')
+            value: Set of people who gave that response, each a name/email string
+        indent (str): Indentation string to write before each line
+        is_new (bool): True if this output should be highlighted
+
+    Returns:
+        int: Number of review tags displayed
+    """
+    col = terminal.Color()
+    count = 0
+    for tag, people in rtags.items():
+        for who in people:
+            terminal.Print(indent + '%s %s: ' % ('+' if is_new else ' ', tag),
+                           newline=False, colour=col.GREEN, bright=is_new)
+            terminal.Print(who, colour=col.WHITE, bright=is_new)
+            count += 1
+    return count
+
+def check_patchwork_status(series, series_id, rest_api=call_rest_api):
+    """Check the status of a series on Patchwork
+
+    This finds review tags and comments for a series in Patchwork, displaying
+    them to show what is new compared to the local series.
+
+    Args:
+        series (Series): Series object for the existing branch
+        series_id (str): Patch series ID number
+        rest_api (function): API function to call to access Patchwork, for
+            testing
+    """
+    patches = collect_patches(series, series_id, rest_api)
+    col = terminal.Color()
+    count = len(series.commits)
+    new_rtag_list = [None] * count
+
+    patch_for_commit, _, warnings = compare_with_series(series, patches)
+    for warn in warnings:
+        tout.Warning(warn)
+
+    patch_list = [patch_for_commit.get(c) for c in range(len(series.commits))]
+
+    with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
+        futures = executor.map(
+            find_new_responses, repeat(new_rtag_list), range(count),
+            series.commits, patch_list, repeat(rest_api))
+    for fresponse in futures:
+        if fresponse:
+            raise fresponse.exception()
+
+    num_to_add = 0
+    for seq, cmt in enumerate(series.commits):
+        patch = patch_for_commit.get(seq)
+        if not patch:
+            continue
+        terminal.Print('%3d %s' % (patch.seq, patch.subject[:50]),
+                       colour=col.BLUE)
+        cmt = series.commits[seq]
+        base_rtags = cmt.rtags
+        new_rtags = new_rtag_list[seq]
+
+        indent = ' ' * 2
+        show_responses(base_rtags, indent, False)
+        num_to_add += show_responses(new_rtags, indent, True)
+
+    terminal.Print("%d new response%s available in patchwork" %
+                   (num_to_add, 's' if num_to_add != 1 else ''))