PERF: read_csv macro updates by WillAyd · Pull Request #52632 · pandas-dev/pandas (original) (raw)
If anyone cared for the details here is the verbose annotated assembly output (from -fverbose-asm
gcc flag) for before:
.L246:
pandas/_libs/src/parser/tokenizer.c:929: } else if (IS_DELIMITER(c)) {
.loc 3 929 28 is_stmt 1
movq -136(%rbp), %rax # self, tmp683
movl 188(%rax), %eax # self_452(D)->delim_whitespace, _180
pandas/_libs/src/parser/tokenizer.c:929: } else if (IS_DELIMITER(c)) {
.loc 3 929 27
testl %eax, %eax # _180
jne .L247 #,
pandas/_libs/src/parser/tokenizer.c:929: } else if (IS_DELIMITER(c)) {
.loc 3 929 28 discriminator 1
movq -136(%rbp), %rax # self, tmp684
movzbl 184(%rax), %eax # self_452(D)->delimiter, _181
cmpb %al, -49(%rbp) # _181, c
je .L248 #,
.L247:
pandas/_libs/src/parser/tokenizer.c:929: } else if (IS_DELIMITER(c)) {
.loc 3 929 28 is_stmt 0 discriminator 3
movq -136(%rbp), %rax # self, tmp685
movl 188(%rax), %eax # self_452(D)->delim_whitespace, _182
testl %eax, %eax # _182
je .L249 #,
pandas/_libs/src/parser/tokenizer.c:929: } else if (IS_DELIMITER(c)) {
.loc 3 929 28 discriminator 4
call __ctype_b_loc@PLT #
movq (%rax), %rdx # *_183, _184
movsbq -49(%rbp), %rax # c, _185
addq %rax, %rax # _186
addq %rdx, %rax # _184, _187
movzwl (%rax), %eax # *_187, _188
movzwl %ax, %eax # _188, _189
andl $1, %eax #, _190
testl %eax, %eax # _190
je .L249 #,
.L248:
pandas/_libs/src/parser/tokenizer.c:930: if (self->delim_whitespace) {
and after
.L243:
pandas/_libs/src/parser/tokenizer.c:930: } else if (IS_DELIMITER(c)) {
.loc 3 930 27 is_stmt 1
movzbl -57(%rbp), %eax # c, tmp657
cmpb -41(%rbp), %al # delimiter, tmp657
je .L244 #,
pandas/_libs/src/parser/tokenizer.c:930: } else if (IS_DELIMITER(c)) {
.loc 3 930 28 discriminator 1
cmpl $0, -40(%rbp) #, delim_whitespace
je .L245 #,
pandas/_libs/src/parser/tokenizer.c:930: } else if (IS_DELIMITER(c)) {
.loc 3 930 28 is_stmt 0 discriminator 2
call __ctype_b_loc@PLT #
movq (%rax), %rdx # *_171, _172
movsbq -57(%rbp), %rax # c, _173
addq %rax, %rax # _174
addq %rdx, %rax # _172, _175
movzwl (%rax), %eax # *_175, _176
movzwl %ax, %eax # _176, _177
andl $1, %eax #, _178
testl %eax, %eax # _178
je .L245 #,
.L244:
pandas/_libs/src/parser/tokenizer.c:931: if (self->delim_whitespace) {
Very low level...but this is executed for potentially every character in a file