PERF: read_csv macro updates by WillAyd · Pull Request #52632 · pandas-dev/pandas (original) (raw)

If anyone cared for the details here is the verbose annotated assembly output (from -fverbose-asm gcc flag) for before:

.L246:

pandas/_libs/src/parser/tokenizer.c:929: } else if (IS_DELIMITER(c)) {

.loc 3 929 28 is_stmt 1
movq	-136(%rbp), %rax	# self, tmp683
movl	188(%rax), %eax	# self_452(D)->delim_whitespace, _180

pandas/_libs/src/parser/tokenizer.c:929: } else if (IS_DELIMITER(c)) {

.loc 3 929 27
testl	%eax, %eax	# _180
jne	.L247	#,

pandas/_libs/src/parser/tokenizer.c:929: } else if (IS_DELIMITER(c)) {

.loc 3 929 28 discriminator 1
movq	-136(%rbp), %rax	# self, tmp684
movzbl	184(%rax), %eax	# self_452(D)->delimiter, _181
cmpb	%al, -49(%rbp)	# _181, c
je	.L248	#,

.L247:

pandas/_libs/src/parser/tokenizer.c:929: } else if (IS_DELIMITER(c)) {

.loc 3 929 28 is_stmt 0 discriminator 3
movq	-136(%rbp), %rax	# self, tmp685
movl	188(%rax), %eax	# self_452(D)->delim_whitespace, _182
testl	%eax, %eax	# _182
je	.L249	#,

pandas/_libs/src/parser/tokenizer.c:929: } else if (IS_DELIMITER(c)) {

.loc 3 929 28 discriminator 4
call	__ctype_b_loc@PLT	#
movq	(%rax), %rdx	# *_183, _184
movsbq	-49(%rbp), %rax	# c, _185
addq	%rax, %rax	# _186
addq	%rdx, %rax	# _184, _187
movzwl	(%rax), %eax	# *_187, _188
movzwl	%ax, %eax	# _188, _189
andl	$1, %eax	#, _190
testl	%eax, %eax	# _190
je	.L249	#,

.L248:

pandas/_libs/src/parser/tokenizer.c:930: if (self->delim_whitespace) {

and after

.L243:

pandas/_libs/src/parser/tokenizer.c:930: } else if (IS_DELIMITER(c)) {

.loc 3 930 27 is_stmt 1
movzbl	-57(%rbp), %eax	# c, tmp657
cmpb	-41(%rbp), %al	# delimiter, tmp657
je	.L244	#,

pandas/_libs/src/parser/tokenizer.c:930: } else if (IS_DELIMITER(c)) {

.loc 3 930 28 discriminator 1
cmpl	$0, -40(%rbp)	#, delim_whitespace
je	.L245	#,

pandas/_libs/src/parser/tokenizer.c:930: } else if (IS_DELIMITER(c)) {

.loc 3 930 28 is_stmt 0 discriminator 2
call	__ctype_b_loc@PLT	#
movq	(%rax), %rdx	# *_171, _172
movsbq	-57(%rbp), %rax	# c, _173
addq	%rax, %rax	# _174
addq	%rdx, %rax	# _172, _175
movzwl	(%rax), %eax	# *_175, _176
movzwl	%ax, %eax	# _176, _177
andl	$1, %eax	#, _178
testl	%eax, %eax	# _178
je	.L245	#,

.L244:

pandas/_libs/src/parser/tokenizer.c:931: if (self->delim_whitespace) {

Very low level...but this is executed for potentially every character in a file