cutadapt "succinct" patch (original) (raw)
diff -rupN '--exclude=.git' cutadapt/cutadapt/report.py cutadapt_patched/cutadapt/report.py
--- cutadapt/cutadapt/report.py 2015-03-26 13:00:26.163894349 -0500
+++ cutadapt_patched/cutadapt/report.py 2015-03-26 12:58:56.944023106 -0500
@@ -252,3 +252,55 @@ def print_statistics(adapters_pair, pair
print(' Please see the detailed output above.')
sys.stdout = old_stdout
+def print_succinct(adapters_pair, paired, stats,
modifiers, modifiers2, writers, file=None):
"""Print single-line summary"""
old_stdout = sys.stdout
if file is not None:
sys.stdout = file
n = stats.n
too_short = 0
too_long = 0
written = 0
written_bp = 0
too_many_n = 0
for w in writers:
if isinstance(w, TooShortReadFilter):
too_short = w.too_short
elif isinstance(w, TooLongReadFilter):
too_long = w.too_long
elif isinstance(w, NContentTrimmer):
too_many_n = w.too_many_n
elif isinstance(w, (ProcessedReadWriter, Demultiplexer)):
written = w.written
written_bp = w.written_bp
with_adapters = [0, 0]
for i in (0, 1):
for adapter in adapters_pair[i]:
with_adapters[i] += sum(adapter.lengths_front.values())
with_adapters[i] += sum(adapter.lengths_back.values())
quality_trimmed_bp = [ int(qtrimmed(modifiers) or 0),
int(qtrimmed(modifiers2) or 0) ]
total_bp = sum(stats.total_bp)
sep = "\t"
fields = ["OK", n, total_bp, too_short, too_long, too_many_n, written,
with_adapters[0], quality_trimmed_bp[0], written_bp[0]]
if paired:
fields.extend( [with_adapters[1], quality_trimmed_bp[1], written_bp[1]] )
warnbase = None
for which_in_pair in (0, 1):
for index, adapter in enumerate(adapters_pair[which_in_pair]):
total = sum(adapter.adjacent_bases.values())
for base in ['A', 'C', 'G', 'T', '']:
b = base if base != '' else 'none/other'
fraction = 1.0 * adapter.adjacent_bases[base] / total
if fraction > 0.8 and base != '':
warnbase = b
if total >= 20 and warnbase is not None:
fields[0] = "WARN"
print( sep.join(map(str,fields)) )
sys.stdout = old_stdout
diff -rupN '--exclude=.git' cutadapt/cutadapt/scripts/cutadapt.py cutadapt_patched/cutadapt/scripts/cutadapt.py
--- cutadapt/cutadapt/scripts/cutadapt.py 2015-03-26 13:00:26.163894349 -0500
+++ cutadapt_patched/cutadapt/scripts/cutadapt.py 2015-03-26 12:16:52.509715125 -0500
@@ -79,7 +79,7 @@ from cutadapt.modifiers import (LengthTa
NEndTrimmer)
from cutadapt.writers import (TooShortReadFilter, TooLongReadFilter,
ProcessedReadWriter, Demultiplexer, NContentTrimmer)
-from cutadapt.report import Statistics, print_statistics
+from cutadapt.report import Statistics, print_statistics, print_succinct
from cutadapt.compat import next
logger = logging.getLogger(__name__)
@@ -425,6 +425,19 @@ def get_option_parser():
group = OptionGroup(parser, "Options that influence what gets output to where")
group.add_option("--quiet", default=False, action='store_true',
help="Do not print a report at the end.")
group.add_option("--succinct", default=False, action='store_true',
help="Print a single-line, tab-delimited summary. The fields are: "
"status, "
"reads/pairs processed, "
"total bp processed, "
"reads/pairs too short, "
"reads/pairs too long, "
"reads/pairs with too many N, "
"reads/pairs written, "
"AND for each individual read file: "
"reads w/ adapters, "
"bases quality-trimmed, "
"bases written.")
group.add_option("-o", "--output", metavar="FILE",
help="Write modified reads to FILE. FASTQ or FASTA format is chosen "
"depending on input. The summary report is sent to standard output. "
@@ -745,11 +758,12 @@ def main(cmdlineargs=None, default_outfi
if options.output:
logger.root.handlers = []
logging.basicConfig(level=logging.INFO, format='%(message)s', stream=sys.stdout)
- logger.info("This is cutadapt %s with Python %s", __version__, platform.python_version())
- logger.info("Command line parameters: %s", " ".join(cmdlineargs))
- logger.info("Trimming %s adapter(s) with at most %.1f%% errors in %s mode ...",
- len(adapters) + len(adapters2), options.error_rate * 100,
- { False: 'single-end', 'first': 'paired-end legacy', 'both': 'paired-end' }[paired])
if not options.succinct:
logger.info("This is cutadapt %s with Python %s", __version__, platform.python_version())
logger.info("Command line parameters: %s", " ".join(cmdlineargs))
logger.info("Trimming %s adapter(s) with at most %.1f%% errors in %s mode ...",
len(adapters) + len(adapters2), options.error_rate * 100,
{ False: 'single-end', 'first': 'paired-end legacy', 'both': 'paired-end' }[paired])
try:
reader = seqio.open(input_filename, file2=input_paired_filename,
@@ -851,7 +865,11 @@ def main(cmdlineargs=None, default_outfi
f.close()
elapsed_time = time.clock() - start_time
- if not options.quiet:
if options.succinct:
stat_file = sys.stderr if options.output is None else None
print_succinct((adapters, adapters2), paired, stats,
modifiers, modifiers2, writers, file=stat_file)
elif not options.quiet:
# send statistics to stderr if result was sent to stdout
stat_file = sys.stderr if options.output is None else None
print_statistics((adapters, adapters2), paired, elapsed_time, stats,