(original) (raw)
blamecount-1.1/blamecount0000775000175000017500000002330114552633410013733 0ustar esresr#! /usr/bin/env python3 # SPDX-FileCopyrightText: (C) Eric S. Raymond esr@thyrsus.com # SPDX-License-Identifier: BSD-2-Clause """blamecount [-t] [-e] [-a] [-?] Options: -t report contributed total lines (default) -e report xcontributed spans -a report commit coonts per author -c report commit coonts per committer -? dump this summary Report contribution statistics on a Git repository. Ignores files that git thinks are binary. Ignores contributor spans consisting entirely of blank lines. """ from __future__ import division # pylint: disable=multiple-imports,invalid-name,unnecessary-lambda-assignment.consider-using-f-string,too-many-branches import io, sys, os, subprocess, getopt # Any encoding that passes through 0x80-0xFF unmodified will do. master_encoding = "latin-1" if str is bytes: # Python 2 polystr = str # pragma: no cover (false negative) polybytes = bytes # pragma: no cover (false negative) polystream = lambda x: x else: # Python 3 def polystr(obj): "Polymorphic string factory function" # This is something of a hack: on Python 2, bytes is an alias # for str, so this ends up just giving a str back for all # inputs; but on Python 3, if fed a byte string, it decodes it # to Unicode using the specified master encoding, which should # be either 'ascii' if you're sure all data being handled will # be ASCII data, or 'latin-1' otherwise; this ensures that the # original bytes can be recovered by re-encoding. if isinstance(obj, str): return obj if not isinstance(obj, bytes): return str(obj) # pragma: no cover (interactive only) return str(obj, encoding=master_encoding) def polybytes(s): "Polymorphic string encoding function" # This is the reverse of the above hack; on Python 2 it returns # all strings unchanged, but on Python 3 it encodes Unicode # strings back to bytes using the specified master encoding. if isinstance(s, bytes): return s # if not isinstance(s, str): # return bytes(s) return bytes(s, encoding=master_encoding) def polystream(stream): "Standard input/output wrapper factory function" # This ensures that the encoding of standard output and standard # error on Python 3 matches the master encoding we use to turn # bytes to Unicode in polystr above. return io.TextIOWrapper(stream.buffer, encoding=master_encoding, newline="\n") sys.stdin = polystream(sys.stdin) sys.stdout = polystream(sys.stdout) sys.stderr = polystream(sys.stderr) junklines = ("", "/*", "*/", "(*", "*)", "*", "//", "#", "") def capture(command): "Run a specified command, capturing the output." try: content = subprocess.check_output(command, shell=True, stderr=sys.stderr) except (subprocess.CalledProcessError, OSError) as oe: sys.stderr.write("execution of '{0}' failed: {1}\b".format(command, oe)) raise SystemExit(1) from oe return content def no_blame_attaches(fn): "Say if blame can't sensibly be run on this file" return "text" not in polystr(capture("file -b --mime-type " + fn)) class ContributorBand: "Represent a band of contributions by an author." def __init__(self, sourcefile, startline, legend, ws): self.sourcefile = sourcefile self.startline = startline self.endline = startline self.legend = legend self.ignorecount = 1 if ws else 0 def addline(self, endline, legend, ws): "Add a line to a span." self.endline = endline self.legend = legend self.ignorecount += 1 if ws else 0 def ignoreme(self): "Should this span be ignored?" return self.ignorecount == self.endline - self.startline + 1 class Contributions: "Collect blame lines sassociated with a person." def __init__(self): self.locs = [] def append(self, source, ln, legend, ws): "Record a blame line." if ( len(self.locs) == 0 or source != self.locs[-1].sourcefile or self.locs[-1].endline != ln - 1 ): self.locs.append(ContributorBand(source, ln, legend, ws)) else: self.locs[-1].addline(ln, legend, ws) def linecount(self): "Report the linecount of the person's contributions." w = 0 for span in self.locs: w += span.endline - span.startline + 1 return w def emacs(self): "Generate a report of line spans sutable for an Emacs compilation buffer" def inner_report(s, ln, legend): "Report a boulding line for a span." return "{0}:{1}:{2}\n".format(s, ln, legend) return "".join( [ inner_report(x.sourcefile, x.startline, x.legend) + inner_report(x.sourcefile, x.endline, x.legend) for x in self.locs ] ) def __str__(self): return str(self.locs) def is_hash(s): "Does this string look like a Git hash?" return len(s) == 40 def is_ws(c): "Is a character whitespace?" return c in (" ", "\t") def ignorable(line): "Should a line be ignored for attribution purposes?" for junk in junklines: if line.strip() == junk: return True return False def collect_blame(): "Collect blame statistics on the current repository." c_totalcount = 0 c_attrib = {} sources = capture("git ls-files --eol").decode(master_encoding) for sourceline in sources.split("\n"): if not sourceline: continue fields = sourceline.split() sourcefile = fields[-1] if "-text" in fields[0] or "-text" in fields[1] or "-text" in fields[2]: continue with polystream( os.popen("git blame --line-porcelain " + sourcefile, "r") ) as rfp: c_author = None c_mail = None c_ln = None while True: line = rfp.readline() if line == "": break if line.startswith("author "): c_author = line[7:].strip() elif line.startswith("author-mail "): c_mail = line[12:].strip() elif is_ws(line[0]): if c_author is None or c_mail is None or c_ln is None: sys.stderr.write( "blamecount: missing metadata - possible git blame bug.\n" ) # pylint: disable=raise-missing-from raise SystemExit(1) c_totalcount += 1 key = c_author + " " + c_mail if key not in c_attrib: c_attrib[key] = Contributions() c_attrib[key].append(sourcefile, c_ln, key, ignorable(line)) c_author = None c_mail = None c_ln = None else: fields = line.split() if is_hash(fields[0]): c_ln = int(fields[2]) # pylint: disable=redefined-outer-name,unnecessary-dict-index-lookup for k, v in c_attrib.items(): c_attrib[k].locs = [span for span in v.locs if not span.ignoreme()] return c_totalcount, c_attrib def collect_attributions(query): "Count attributions by author." a_totalcount = 0 a_attrib = {} attribs = capture('git log --format="format:%s"' % query).decode(master_encoding) for attribline in attribs.split("\n"): a_totalcount += 1 if attribline not in a_attrib: a_attrib[attribline] = 0 a_attrib[attribline] += 1 return a_totalcount, a_attrib def sort_report(iattrib, totalc, legend, extract): "Sort tuples and report them." report = [] for k, v in iattrib.items(): value = extract(v) if value > 0: report.append((value, k)) report.sort(key=lambda x: x[0], reverse=True) print("100%\t{0}\t{1}".format(totalc, legend)) for (lines, lauthor) in report: print("{0:.1f}%\t{1}\t{2}".format((lines / totalc) * 100, lines, lauthor)) if __name__ == "__main__": try: (opts, arguments) = getopt.getopt(sys.argv[1:], "acet?") except getopt.GetoptError: sys.stderr.write(__doc__) # pylint: disable=raise-missing-from raise SystemExit(1) mode = "totals" author = None verbose = 0 for (opt, arg) in opts: if opt == "-a": mode = "attributions" fmt = "%an <%ae>" banner = "author" elif opt == "-c": mode = "attributions" fmt = "%cn <%ce>" banner = "committer" elif opt == "-e": mode = "emacs" elif opt == "-t": mode = "totals" elif opt == "-?": sys.stderr.write(__doc__) sys.stderr.write( "\nIgnored if alone on a line: {0}\n".format( " ".join([repr(x) for x in junklines]) ) ) raise SystemExit(0) if mode == "attributions": totalcount, attrib = collect_attributions(fmt) sort_report(attrib, totalcount, banner, lambda x: x) else: totalcount, attrib = collect_blame() if mode == "totals": sort_report(attrib, totalcount, "blame", lambda x: x.linecount()) elif mode == "emacs": authors = list(attrib.keys()) authors.sort(key=lambda c: (-attrib[c].linecount(), c)) for a in authors: sys.stdout.write(attrib[a].emacs()) # end blamecount-1.1/blamecount.10000664000175000017500000000516214560163177014103 0ustar esresr'\" t .\" Title: blamecount .\" Author: /// SPDX-FileCopyrightText: (C) Eric S. Raymond esr@thyrsus.com .\" Generator: Asciidoctor 2.0.16 .\" Date: 2024-01-19 .\" Manual: \ \& .\" Source: \ \& .\" Language: English .\" .TH "BLAMECOUNT" "1" "2024-01-19" "\ \&" "\ \&" .ie \n(.g .ds Aq \(aq .el .ds Aq ' .ss \n[.ss] 0 .nh .ad l .de URL \fI\\$2\fP <\\$1>\\$3 .. .als MTO URL .if \n[.g] \{\ . mso www.tmac . am URL . ad l . . . am MTO . ad l . . . LINKSTYLE blue R < > .\} .SH "NAME" blamecount \- report attribution statistics from a git repository .SH "SYNOPSIS" .sp \fBblamecount\fP [\-t] [\-e] [\-a] [\-c] [\-?] .SH "OPTIONS" .sp \-t .RS 4 Return a list of contributor statistics, both as lines touched and as percentage of lines in the codebase. This is the default mode. .RE .sp \-e .RS 4 Report of contributor change bands. The output can fill an Emacs compilation buffer that allows you to step through bands of lines touched by individual authors. .RE .sp \-a .RS 4 Report commit counts by author. .RE .sp \-c .RS 4 Report commit counts by committer. .RE .sp \-? .RS 4 Dump a usage message. .RE .SH "DESCRIPTION" .sp This program uses collects attribution statistics on a codebase. Run it within a Git repository directory. .sp The specific report is controlled by the option you choose. In its \-t abnd \-e modes, it uses git blame to gather and report statistics on which author last touched each line in te tip revision. In \-a mode it somply reports commit counts by author. .sp To reduce noise in \-t and \-e reports, author spans consisting entirely of junk lines are ignored. Thus, percentages may not add up to 100%. Junk lines are empty lines and those which, when stripped, consist of the comment leaders (and trailers) "/*", "*/", "(*", "*)", "*", "//", "", and "#" alone. Such lines are \fInot\fP ignored when they share a span with non\-junk lines. .sp Files that Git detects to be binary (such as images) are ignored in \-t and \-e modes. The test used is whether "git ls\-files \-\-eol" finds a non\-textual eol type for the file. If need be this can be configured on a per\-project basis using the gitattributes facility. .sp The error message "missing metadata \- possible git blame bug." indicates that git blame\(cqs reporting format may have changed in a way that confuses blamecount. .SH "REPORTING BUGS" .sp Report bugs to Eric S. Raymond \c .MTO "esr\(atthyrsus.com" "" "." The project page is at \c .URL "http://catb.org/\~esr/blamecount" "" "" .SH "SEE ALSO" .sp git\-blame(1), git\-log(1). .SH "AUTHOR" .sp /// SPDX-FileCopyrightText: (C) Eric S. Raymond esr@thyrsus.comblamecount-1.1/blamecount.adoc0000664000175000017500000000406214552633666014655 0ustar esresr= blamecount(1) = /// SPDX-FileCopyrightText: (C) Eric S. Raymond esr@thyrsus.com /// SPDX-License-Identifier: BSD-2-Clause :doctype: manpage == NAME == blamecount - report attribution statistics from a git repository == SYNOPSIS == *blamecount* [-t] [-e] [-a] [-c] [-?] == OPTIONS == -t:: Return a list of contributor statistics, both as lines touched and as percentage of lines in the codebase. This is the default mode. -e:: Report of contributor change bands. The output can fill an Emacs compilation buffer that allows you to step through bands of lines touched by individual authors. -a:: Report commit counts by author. -c:: Report commit counts by committer. -?:: Dump a usage message. == DESCRIPTION == This program uses collects attribution statistics on a codebase. Run it within a Git repository directory. The specific report is controlled by the option you choose. In its -t abnd -e modes, it uses git blame to gather and report statistics on which author last touched each line in te tip revision. In -a mode it somply reports commit counts by author. To reduce noise in -t and -e reports, author spans consisting entirely of junk lines are ignored. Thus, percentages may not add up to 100%. Junk lines are empty lines and those which, when stripped, consist of the comment leaders (and trailers) "/\*", "*/", "(\*", "*)", "*", "//", "", and "#" alone. Such lines are _not_ ignored when they share a span with non-junk lines. Files that Git detects to be binary (such as images) are ignored in -t and -e modes. The test used is whether "git ls-files --eol" finds a non-textual eol type for the file. If need be this can be configured on a per-project basis using the gitattributes facility. The error message "missing metadata - possible git blame bug." indicates that git blame's reporting format may have changed in a way that confuses blamecount. == REPORTING BUGS == Report bugs to Eric S. Raymond esr@thyrsus.com. The project page is at http://catb.org/\~esr/blamecount == SEE ALSO == git-blame(1), git-log(1). blamecount-1.1/control0000664000175000017500000000134214547056320013263 0ustar esresr# This is not a real Debian control file, though the syntax is compatible # It's project metadata for the shipper tool # SPDX-FileCopyrightText: (C) Eric S. Raymond esr@thyrsus.com # SPDX-License-Identifier: BSD-2-Clause Package: blamecount Description: Audit contributions via git blame. A wrapper around git blame that cshopws lines-last-toched statistics for every contributor. Optionally, generate a report that can be used in an Emacs compilation buffer to step through contribution bands for all users or for a specified user. Homepage: http://www.catb.org/\~esr/blamecount XBS-HTML-Target: index.html XBS-Repository-URL: https://gitlab.com/esr/blamecount XBS-VC-Tag-Template: %(version)s XBS-Logo: blamecount-logo.png blamecount-1.1/COPYING0000664000175000017500000000234614546313651012722 0ustar esresr BSD LICENSE Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. blamecount-1.1/Makefile0000664000175000017500000000400214560162254013313 0ustar esresr# Project makefile for blamecount # # Requires Python 3 and asciidoctor. Tesrts also want pylint and shellcheck. VERSION=$(shell sed -n MANIFEST @(cd ..; ln -s blamecount blamecount-$(VERSION)) (cd ..; tar -czf blamecount/blamecount-$(VERSION).tar.gz `cat blamecount/MANIFEST`) @ls -l blamecount-$(VERSION).tar.gz @(cd ..; rm blamecount-$(VERSION)) dist: blamecount-$(VERSION).tar.gz release: blamecount-$(VERSION).tar.gz blamecount.html shipper version=$(VERSION) | sh -e -x refresh: blamecount.html shipper -N -w version=$(VERSION) | sh -e -x blamecount-1.1/NEWS.adoc0000664000175000017500000000023714560660221013262 0ustar esresr= blamecount project news. 1.1: 2024-02-07:: Added -a and -c options to report commit commit authorship statistics. 1.0: 2024-01-08:: Initial release. blamecount-1.1/README.adoc0000664000175000017500000000103614552455234013450 0ustar esresr= blamecount - contribution auditing via git blame = Run in a repository to see per-contributor statistics about who last touched how many lines. With an option, generate a report that can be visited in an Emacs compilation buffer to step through contribution bands. With a different option, report commit counts by author. A simple Python 3 script. Just download it and go. "make" in the project directory builds the documentation. "make install" as root installs the code and documentation. "make check" runs the test suite. // end/esr@thyrsus.com/esr@thyrsus.com/esr@thyrsus.com/esr@thyrsus.com