Masahiro Yamada | 94b13bb | 2018-01-21 18:34:57 +0900 | [diff] [blame] | 1 | #!/usr/bin/env python2 |
Heinrich Schuchardt | 6bfa036 | 2018-06-03 18:59:13 +0200 | [diff] [blame] | 2 | # SPDX-License-Identifier: GPL-2.0+ |
Masahiro Yamada | 45765ee | 2014-07-16 17:49:45 +0900 | [diff] [blame] | 3 | # |
| 4 | # Copyright (C) 2014, Masahiro Yamada <yamada.m@jp.panasonic.com> |
Masahiro Yamada | 45765ee | 2014-07-16 17:49:45 +0900 | [diff] [blame] | 5 | |
| 6 | ''' |
| 7 | A tool to create/update the mailmap file |
| 8 | |
| 9 | The command 'git shortlog' summarizes git log output in a format suitable |
| 10 | for inclusion in release announcements. Each commit will be grouped by |
| 11 | author and title. |
| 12 | |
| 13 | One problem is that the authors' name and/or email address is sometimes |
| 14 | spelled differently. The .mailmap feature can be used to coalesce together |
| 15 | commits by the same persion. |
| 16 | (See 'man git-shortlog' for furthur information of this feature.) |
| 17 | |
| 18 | This tool helps to create/update the mailmap file. |
| 19 | |
| 20 | It runs 'git shortlog' internally and searches differently spelled author |
| 21 | names which share the same email address. The author name with the most |
| 22 | commits is asuumed to be a canonical real name. If the number of commits |
| 23 | from the cananonical name is equal to or greater than 'MIN_COMMITS', |
| 24 | the entry for the cananical name will be output. ('MIN_COMMITS' is used |
| 25 | here because we do not want to create a fat mailmap by adding every author |
| 26 | with only a few commits.) |
| 27 | |
| 28 | If there exists a mailmap file specified by the mailmap.file configuration |
| 29 | options or '.mailmap' at the toplevel of the repository, it is used as |
| 30 | a base file. (The mailmap.file configuration takes precedence over the |
| 31 | '.mailmap' file if both exist.) |
| 32 | |
| 33 | The base file and the newly added entries are merged together and sorted |
| 34 | alphabetically (but the comment block is kept untouched), and then printed |
| 35 | to standard output. |
| 36 | |
| 37 | Usage |
| 38 | ----- |
| 39 | |
| 40 | scripts/mailmapper |
| 41 | |
| 42 | prints the mailmapping to standard output. |
| 43 | |
| 44 | scripts/mailmapper > tmp; mv tmp .mailmap |
| 45 | |
| 46 | will be useful for updating '.mailmap' file. |
| 47 | ''' |
| 48 | |
| 49 | import sys |
| 50 | import os |
| 51 | import subprocess |
| 52 | |
| 53 | # The entries only for the canonical names with MIN_COMMITS or more commits. |
| 54 | # This limitation is used so as not to create a too big mailmap file. |
| 55 | MIN_COMMITS = 50 |
| 56 | |
| 57 | try: |
| 58 | toplevel = subprocess.check_output(['git', 'rev-parse', '--show-toplevel']) |
| 59 | except subprocess.CalledProcessError: |
Masahiro Yamada | 31e2141 | 2014-08-16 00:59:26 +0900 | [diff] [blame] | 60 | sys.exit('Please run in a git repository.') |
Masahiro Yamada | 45765ee | 2014-07-16 17:49:45 +0900 | [diff] [blame] | 61 | |
| 62 | # strip '\n' |
| 63 | toplevel = toplevel.rstrip() |
| 64 | |
| 65 | # Change the current working directory to the toplevel of the respository |
| 66 | # for our easier life. |
| 67 | os.chdir(toplevel) |
| 68 | |
| 69 | # First, create 'auther name' vs 'number of commits' database. |
| 70 | # We assume the name with the most commits as the canonical real name. |
| 71 | shortlog = subprocess.check_output(['git', 'shortlog', '-s', '-n']) |
| 72 | |
| 73 | commits_per_name = {} |
| 74 | |
| 75 | for line in shortlog.splitlines(): |
| 76 | try: |
| 77 | commits, name = line.split(None, 1) |
| 78 | except ValueError: |
| 79 | # ignore lines with an empty author name |
| 80 | pass |
| 81 | commits_per_name[name] = int(commits) |
| 82 | |
| 83 | # Next, coalesce the auther names with the same email address |
| 84 | shortlog = subprocess.check_output(['git', 'shortlog', '-s', '-n', '-e']) |
| 85 | |
| 86 | mail_vs_name = {} |
| 87 | output = {} |
| 88 | |
| 89 | for line in shortlog.splitlines(): |
| 90 | # tmp, mail = line.rsplit(None, 1) is not safe |
| 91 | # because weird email addresses might include whitespaces |
| 92 | tmp, mail = line.split('<') |
| 93 | mail = '<' + mail.rstrip() |
| 94 | try: |
| 95 | _, name = tmp.rstrip().split(None, 1) |
| 96 | except ValueError: |
| 97 | # author name is empty |
| 98 | name = '' |
| 99 | if mail in mail_vs_name: |
| 100 | # another name for the same email address |
| 101 | prev_name = mail_vs_name[mail] |
| 102 | # Take the name with more commits |
| 103 | major_name = sorted([prev_name, name], |
| 104 | key=lambda x: commits_per_name[x] if x else 0)[1] |
| 105 | mail_vs_name[mail] = major_name |
| 106 | if commits_per_name[major_name] > MIN_COMMITS: |
| 107 | output[mail] = major_name |
| 108 | else: |
| 109 | mail_vs_name[mail] = name |
| 110 | |
| 111 | # [1] If there exists a mailmap file at the location pointed to |
| 112 | # by the mailmap.file configuration option, update it. |
| 113 | # [2] If the file .mailmap exists at the toplevel of the repository, update it. |
| 114 | # [3] Otherwise, create a new mailmap file. |
| 115 | mailmap_files = [] |
| 116 | |
| 117 | try: |
| 118 | config_mailmap = subprocess.check_output(['git', 'config', 'mailmap.file']) |
| 119 | except subprocess.CalledProcessError: |
| 120 | config_mailmap = '' |
| 121 | |
| 122 | config_mailmap = config_mailmap.rstrip() |
| 123 | if config_mailmap: |
| 124 | mailmap_files.append(config_mailmap) |
| 125 | |
| 126 | mailmap_files.append('.mailmap') |
| 127 | |
| 128 | infile = None |
| 129 | |
| 130 | for map_file in mailmap_files: |
| 131 | try: |
| 132 | infile = open(map_file) |
| 133 | except: |
| 134 | # Failed to open. Try next. |
| 135 | continue |
| 136 | break |
| 137 | |
| 138 | comment_block = [] |
| 139 | output_lines = [] |
| 140 | |
| 141 | if infile: |
| 142 | for line in infile: |
| 143 | if line[0] == '#' or line[0] == '\n': |
| 144 | comment_block.append(line) |
| 145 | else: |
| 146 | output_lines.append(line) |
| 147 | break |
| 148 | for line in infile: |
| 149 | output_lines.append(line) |
| 150 | infile.close() |
| 151 | |
| 152 | for mail, name in output.items(): |
| 153 | output_lines.append(name + ' ' + mail + '\n') |
| 154 | |
| 155 | output_lines.sort() |
| 156 | |
| 157 | sys.stdout.write(''.join(comment_block + output_lines)) |