CSV Parse breaks on comment characters that are also in rows (original) (raw)
Describe the bug
I am using csv-parse@5.5.3 and it isn’t good at handling comments. For example, if a line starts with #, it should be a comment. Later on, rows with # fail to parse.
To Reproduce
Trying to parse https://download.geonames.org/export/dump/countryInfo.txt with the first 50 lines of comments. The Postal Code Format column has many # characters
const defaultParseOptions: ParseOptions = { bom: true, cast: true, columns: false, // comment: "#", // comment_no_infix: true, delimiter: " ", escape: null, groupColumnsByName: false, quote: null, record_delimiter: ["\n", "\r", "\r\n"], relax_quotes: true, skip_empty_lines: true, };
Additional context
Workaround
class RemoveCommentTransform extends Transform { override _transform( chunk: any, _encoding: BufferEncoding, callback: TransformCallback, ): void { const line = String(chunk); if (line.trim() !== "" && !line.startsWith("#")) { callback(null, line + "\n"); } else { callback(null); } } }
const readableStream = fs.createReadStream(filePath); const readlineIterator = readline.createInterface({ crlfDelay: Number.POSITIVE_INFINITY, input: readableStream, }); const readlineStream = Readable.from(readlineIterator); const removeCommentTransform = new RemoveCommentTransform();
const parser = parse({ ...defaultParseOptions, ...parseOptions, });
readlineStream.pipe(removeCommentTransform).pipe(parser);