Skip to content

Commit

Permalink
arm64: csum: Optimise IPv6 header checksum
Browse files Browse the repository at this point in the history
Throwing our __uint128_t idioms at csum_ipv6_magic() makes it
about 1.3x-2x faster across a range of microarchitecture/compiler
combinations. Not much in absolute terms, but every little helps.

Signed-off-by: Robin Murphy <[email protected]>
Signed-off-by: Catalin Marinas <[email protected]>
  • Loading branch information
rmurphy-arm authored and ctmarinas committed Mar 9, 2020
1 parent 27afb23 commit e9c7ddb
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 1 deletion.
7 changes: 6 additions & 1 deletion arch/arm64/include/asm/checksum.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,12 @@
#ifndef __ASM_CHECKSUM_H
#define __ASM_CHECKSUM_H

#include <linux/types.h>
#include <linux/in6.h>

#define _HAVE_ARCH_IPV6_CSUM
__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
const struct in6_addr *daddr,
__u32 len, __u8 proto, __wsum sum);

static inline __sum16 csum_fold(__wsum csum)
{
Expand Down
27 changes: 27 additions & 0 deletions arch/arm64/lib/csum.c
Original file line number Diff line number Diff line change
Expand Up @@ -124,3 +124,30 @@ unsigned int do_csum(const unsigned char *buff, int len)

return sum >> 16;
}

__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
const struct in6_addr *daddr,
__u32 len, __u8 proto, __wsum csum)
{
__uint128_t src, dst;
u64 sum = (__force u64)csum;

src = *(const __uint128_t *)saddr->s6_addr;
dst = *(const __uint128_t *)daddr->s6_addr;

sum += (__force u32)htonl(len);
#ifdef __LITTLE_ENDIAN
sum += (u32)proto << 24;
#else
sum += proto;
#endif
src += (src >> 64) | (src << 64);
dst += (dst >> 64) | (dst << 64);

sum = accumulate(sum, src >> 64);
sum = accumulate(sum, dst >> 64);

sum += ((sum >> 32) | (sum << 32));
return csum_fold((__force __wsum)(sum >> 32));
}
EXPORT_SYMBOL(csum_ipv6_magic);

0 comments on commit e9c7ddb

Please sign in to comment.