patman: Support parsing of review snippets

Add support for parsing the contents of a patchwork 'patch' web page
containing comments received from reviewers. This allows patman to show
these comments in a simple 'snippets' format.

A snippet is some quoted code plus some unquoted comments below it. Each
review is from a unique person/email and can produce multiple snippets,
one for each part of the code that attracts a comment.

Show the file and line-number info at the top of each snippet if
available.

Signed-off-by: Simon Glass <sjg@chromium.org>
diff --git a/tools/patman/func_test.py b/tools/patman/func_test.py
index 2e15295..bbee4b7 100644
--- a/tools/patman/func_test.py
+++ b/tools/patman/func_test.py
@@ -1043,3 +1043,86 @@
         self.assertEqual('Reviewed-by: %s' % self.fred, next(lines))
         self.assertEqual('Reviewed-by: %s' % self.mary, next(lines))
         self.assertEqual('Tested-by: %s' % self.leb, next(lines))
+
+    def testParseSnippets(self):
+        """Test parsing of review snippets"""
+        text = '''Hi Fred,
+
+This is a comment from someone.
+
+Something else
+
+On some recent date, Fred wrote:
+> This is why I wrote the patch
+> so here it is
+
+Now a comment about the commit message
+A little more to say
+
+Even more
+
+> diff --git a/file.c b/file.c
+> Some more code
+> Code line 2
+> Code line 3
+> Code line 4
+> Code line 5
+> Code line 6
+> Code line 7
+> Code line 8
+> Code line 9
+
+And another comment
+
+> @@ -153,8 +143,13 @@ def CheckPatch(fname, show_types=False):
+>  further down on the file
+>  and more code
+> +Addition here
+> +Another addition here
+>  codey
+>  more codey
+
+and another thing in same file
+
+> @@ -253,8 +243,13 @@
+>  with no function context
+
+one more thing
+
+> diff --git a/tools/patman/main.py b/tools/patman/main.py
+> +line of code
+now a very long comment in a different file
+line2
+line3
+line4
+line5
+line6
+line7
+line8
+'''
+        pstrm = PatchStream.process_text(text, True)
+        self.assertEqual([], pstrm.commit.warn)
+
+        # We expect to the filename and up to 5 lines of code context before
+        # each comment. The 'On xxx wrote:' bit should be removed.
+        self.assertEqual(
+            [['Hi Fred,',
+              'This is a comment from someone.',
+              'Something else'],
+             ['> This is why I wrote the patch',
+              '> so here it is',
+              'Now a comment about the commit message',
+              'A little more to say', 'Even more'],
+             ['> File: file.c', '> Code line 5', '> Code line 6',
+              '> Code line 7', '> Code line 8', '> Code line 9',
+              'And another comment'],
+             ['> File: file.c',
+              '> Line: 153 / 143: def CheckPatch(fname, show_types=False):',
+              '>  and more code', '> +Addition here', '> +Another addition here',
+              '>  codey', '>  more codey', 'and another thing in same file'],
+             ['> File: file.c', '> Line: 253 / 243',
+              '>  with no function context', 'one more thing'],
+             ['> File: tools/patman/main.py', '> +line of code',
+              'now a very long comment in a different file',
+              'line2', 'line3', 'line4', 'line5', 'line6', 'line7', 'line8']],
+            pstrm.snippets)
diff --git a/tools/patman/patchstream.py b/tools/patman/patchstream.py
index a4bf08b..c165bc1 100644
--- a/tools/patman/patchstream.py
+++ b/tools/patman/patchstream.py
@@ -4,11 +4,13 @@
 
 """Handles parsing a stream of commits/emails from 'git log' or other source"""
 
+import collections
 import datetime
 import io
 import math
 import os
 import re
+import queue
 import shutil
 import tempfile
 
@@ -51,6 +53,12 @@
 # Match indented lines for changes
 RE_LEADING_WHITESPACE = re.compile(r'^\s')
 
+# Detect a 'diff' line
+RE_DIFF = re.compile(r'^>.*diff --git a/(.*) b/(.*)$')
+
+# Detect a context line, like '> @@ -153,8 +153,13 @@ CheckPatch
+RE_LINE = re.compile(r'>.*@@ \-(\d+),\d+ \+(\d+),\d+ @@ *(.*)')
+
 # States we can be in - can we use range() and still have comments?
 STATE_MSG_HEADER = 0        # Still in the message header
 STATE_PATCH_SUBJECT = 1     # In patch subject (first line of log for a commit)
@@ -81,6 +89,14 @@
         self.blank_count = 0             # Number of blank lines stored up
         self.state = STATE_MSG_HEADER    # What state are we in?
         self.commit = None               # Current commit
+        self.snippets = []               # List of unquoted test blocks
+        self.cur_diff = None             # Last 'diff' line seen (str)
+        self.cur_line = None             # Last context (@@) line seen (str)
+        self.recent_diff= None           # 'diff' line for current snippet (str)
+        self.recent_line= None           # '@@' line for current snippet (str)
+        self.recent_quoted = collections.deque([], 5)
+        self.recent_unquoted = queue.Queue()
+        self.was_quoted = None
 
     @staticmethod
     def process_text(text, is_comment=False):
@@ -176,6 +192,10 @@
             self.skip_blank = True
             self.section = []
 
+        self.cur_diff = None
+        self.recent_diff = None
+        self.recent_line = None
+
     def _parse_version(self, value, line):
         """Parse a version from a *-changes tag
 
@@ -209,6 +229,47 @@
             self.commit.AddChange(self.change_version, change)
         self.change_lines = []
 
+    def _finalise_snippet(self):
+        """Finish off a snippet and add it to the list
+
+        This is called when we get to the end of a snippet, i.e. the we enter
+        the next block of quoted text:
+
+            This is a comment from someone.
+
+            Something else
+
+            > Now we have some code          <----- end of snippet
+            > more code
+
+            Now a comment about the above code
+
+        This adds the snippet to our list
+        """
+        quoted_lines = []
+        while self.recent_quoted:
+            quoted_lines.append(self.recent_quoted.popleft())
+        unquoted_lines = []
+        valid = False
+        while not self.recent_unquoted.empty():
+            text = self.recent_unquoted.get()
+            if not (text.startswith('On ') and text.endswith('wrote:')):
+                unquoted_lines.append(text)
+            if text:
+                valid = True
+        if valid:
+            lines = []
+            if self.recent_diff:
+                lines.append('> File: %s' % self.recent_diff)
+            if self.recent_line:
+                out = '> Line: %s / %s' % self.recent_line[:2]
+                if self.recent_line[2]:
+                    out += ': %s' % self.recent_line[2]
+                lines.append(out)
+            lines += quoted_lines + unquoted_lines
+            if lines:
+                self.snippets.append(lines)
+
     def process_line(self, line):
         """Process a single line of a patch file or commit log
 
@@ -254,6 +315,8 @@
         cover_match = RE_COVER.match(line)
         signoff_match = RE_SIGNOFF.match(line)
         leading_whitespace_match = RE_LEADING_WHITESPACE.match(line)
+        diff_match = RE_DIFF.match(line)
+        line_match = RE_LINE.match(line)
         tag_match = None
         if self.state == STATE_PATCH_HEADER:
             tag_match = RE_TAG.match(line)
@@ -443,6 +506,27 @@
             out = [line]
             self.linenum += 1
             self.skip_blank = False
+
+            if diff_match:
+                self.cur_diff = diff_match.group(1)
+
+            # If this is quoted, keep recent lines
+            if not diff_match and self.linenum > 1 and line:
+                if line.startswith('>'):
+                    if not self.was_quoted:
+                        self._finalise_snippet()
+                        self.recent_line = None
+                    if not line_match:
+                        self.recent_quoted.append(line)
+                    self.was_quoted = True
+                    self.recent_diff = self.cur_diff
+                else:
+                    self.recent_unquoted.put(line)
+                    self.was_quoted = False
+
+            if line_match:
+                self.recent_line = line_match.groups()
+
             if self.state == STATE_DIFFS:
                 pass
 
@@ -466,6 +550,7 @@
 
     def finalise(self):
         """Close out processing of this patch stream"""
+        self._finalise_snippet()
         self._finalise_change()
         self._close_commit()
         if self.lines_after_test: