[crypto] Use inline assembly for bigint_grow() and bigint_shrink()

The bigint_grow() and bigint_shrink() functions are used on the fast
path for big integer calculations (e.g. within the X25519 Montgomery
ladder step).  Use inline assembly implementations of these functions
on all architectures.

Signed-off-by: Michael Brown <mcb30@ipxe.org>
This commit is contained in:
Michael Brown
2026-06-16 16:07:09 +01:00
parent 73d437859d
commit cd873a2b5d
6 changed files with 187 additions and 20 deletions
+43 -4
View File
@@ -170,10 +170,33 @@ bigint_shr_raw ( uint32_t *value0, unsigned int size ) {
static inline __attribute__ (( always_inline )) void
bigint_grow_raw ( const uint32_t *source0, unsigned int source_size,
uint32_t *dest0, unsigned int dest_size ) {
unsigned int pad_size = ( dest_size - source_size );
bigint_t ( dest_size ) __attribute__ (( may_alias )) *dest =
( ( void * ) dest0 );
const uint32_t *source_end = ( source0 + source_size );
uint32_t *dest_end = ( dest0 + dest_size );
uint32_t *discard_source;
uint32_t *discard_dest;
uint32_t discard_source_i;
memcpy ( dest0, source0, sizeof ( bigint_t ( source_size ) ) );
memset ( ( dest0 + source_size ), 0, sizeof ( bigint_t ( pad_size ) ) );
__asm__ __volatile__ ( "\n1:\n\t"
"ldmia %0!, {%2}\n\t"
"stmia %1!, {%2}\n\t"
"teq %0, %4\n\t"
"bne 1b\n\t"
"eor %2, %2\n\t"
"b 3f\n\t"
"\n2:\n\t"
"stmia %1!, {%2}\n\t"
"\n3:\n\t"
"teq %1, %5\n\t"
"bne 2b\n\t"
: "=&l" ( discard_source ),
"=&l" ( discard_dest ),
"=&l" ( discard_source_i ),
"=m" ( *dest )
: "l" ( source_end ),
"l" ( dest_end ),
"0" ( source0 ), "1" ( dest0 ) );
}
/**
@@ -187,8 +210,24 @@ bigint_grow_raw ( const uint32_t *source0, unsigned int source_size,
static inline __attribute__ (( always_inline )) void
bigint_shrink_raw ( const uint32_t *source0, unsigned int source_size __unused,
uint32_t *dest0, unsigned int dest_size ) {
bigint_t ( dest_size ) __attribute__ (( may_alias )) *dest =
( ( void * ) dest0 );
uint32_t *dest_end = ( dest0 + dest_size );
uint32_t *discard_source;
uint32_t *discard_dest;
uint32_t discard_source_i;
memcpy ( dest0, source0, sizeof ( bigint_t ( dest_size ) ) );
__asm__ __volatile__ ( "\n1:\n\t"
"ldmia %0!, {%2}\n\t"
"stmia %1!, {%2}\n\t"
"teq %1, %4\n\t"
"bne 1b\n\t"
: "=&l" ( discard_source ),
"=&l" ( discard_dest ),
"=&l" ( discard_source_i ),
"=m" ( *dest )
: "l" ( dest_end ),
"0" ( source0 ), "1" ( dest0 ) );
}
/**
+44 -3
View File
@@ -171,10 +171,34 @@ bigint_shr_raw ( uint64_t *value0, unsigned int size ) {
static inline __attribute__ (( always_inline )) void
bigint_grow_raw ( const uint64_t *source0, unsigned int source_size,
uint64_t *dest0, unsigned int dest_size ) {
bigint_t ( dest_size ) __attribute__ (( may_alias )) *dest =
( ( void * ) dest0 );
unsigned int pad_size = ( dest_size - source_size );
uint64_t *discard_source;
uint64_t *discard_dest;
uint64_t discard_source_i;
unsigned int discard_source_size;
unsigned int discard_pad_size;
memcpy ( dest0, source0, sizeof ( bigint_t ( source_size ) ) );
memset ( ( dest0 + source_size ), 0, sizeof ( bigint_t ( pad_size ) ) );
__asm__ __volatile__ ( "\n1:\n\t"
"ldr %2, [%0], #8\n\t"
"str %2, [%1], #8\n\t"
"sub %w3, %w3, #1\n\t"
"cbnz %w3, 1b\n\t"
"b 3f\n\t"
"\n2:\n\t"
"str xzr, [%1], #8\n\t"
"sub %w4, %w4, #1\n\t"
"\n3:\n\t"
"cbnz %w4, 2b\n\t"
: "=&r" ( discard_source ),
"=&r" ( discard_dest ),
"=&r" ( discard_source_i ),
"=&r" ( discard_source_size ),
"=&r" ( discard_pad_size ),
"=m" ( *dest )
: "0" ( source0 ), "1" ( dest0 ),
"3" ( source_size ), "4" ( pad_size ) );
}
/**
@@ -188,8 +212,25 @@ bigint_grow_raw ( const uint64_t *source0, unsigned int source_size,
static inline __attribute__ (( always_inline )) void
bigint_shrink_raw ( const uint64_t *source0, unsigned int source_size __unused,
uint64_t *dest0, unsigned int dest_size ) {
bigint_t ( dest_size ) __attribute__ (( may_alias )) *dest =
( ( void * ) dest0 );
uint64_t *discard_source;
uint64_t *discard_dest;
uint64_t discard_source_i;
unsigned int discard_dest_size;
memcpy ( dest0, source0, sizeof ( bigint_t ( dest_size ) ) );
__asm__ __volatile__ ( "\n1:\n\t"
"ldr %2, [%0], #8\n\t"
"str %2, [%1], #8\n\t"
"sub %w3, %w3, #1\n\t"
"cbnz %w3, 1b\n\t"
: "=&r" ( discard_source ),
"=&r" ( discard_dest ),
"=&r" ( discard_source_i ),
"=&r" ( discard_dest_size ),
"=m" ( *dest )
: "0" ( source0 ), "1" ( dest0 ),
"3" ( dest_size ) );
}
/**
+47 -4
View File
@@ -214,10 +214,36 @@ bigint_shr_raw ( uint64_t *value0, unsigned int size ) {
static inline __attribute__ (( always_inline )) void
bigint_grow_raw ( const uint64_t *source0, unsigned int source_size,
uint64_t *dest0, unsigned int dest_size ) {
unsigned int pad_size = ( dest_size - source_size );
const bigint_t ( source_size ) __attribute__ (( may_alias )) *source =
( ( const void * ) source0 );
bigint_t ( dest_size ) __attribute__ (( may_alias )) *dest =
( ( void * ) dest0 );
uint64_t discard_source_i;
uint64_t discard_offset;
memcpy ( dest0, source0, sizeof ( bigint_t ( source_size ) ) );
memset ( ( dest0 + source_size ), 0, sizeof ( bigint_t ( pad_size ) ) );
__asm__ __volatile__ ( "\n1:\n\t"
/* Copy dest[i] */
"ldx.d %0, %3, %1\n\t"
"stx.d %0, %4, %1\n\t"
/* Loop */
"addi.w %1, %1, 8\n\t"
"bne %1, %5, 1b\n\t"
"b 3f\n\t"
"\n2:\n\t"
/* Zero dest[i] */
"stx.d $zero, %4, %1\n\t"
/* Loop */
"addi.w %1, %1, 8\n\t"
"\n3:\n\t"
"bne %1, %6, 2b\n\t"
: "=&r" ( discard_source_i ),
"=&r" ( discard_offset ),
"=m" ( *dest )
: "r" ( source0 ),
"r" ( dest0 ),
"r" ( sizeof ( *source ) ),
"r" ( sizeof ( *dest ) ),
"1" ( 0 ) );
}
/**
@@ -231,8 +257,25 @@ bigint_grow_raw ( const uint64_t *source0, unsigned int source_size,
static inline __attribute__ (( always_inline )) void
bigint_shrink_raw ( const uint64_t *source0, unsigned int source_size __unused,
uint64_t *dest0, unsigned int dest_size ) {
bigint_t ( dest_size ) __attribute__ (( may_alias )) *dest =
( ( void * ) dest0 );
uint64_t discard_source_i;
uint64_t discard_offset;
memcpy ( dest0, source0, sizeof ( bigint_t ( dest_size ) ) );
__asm__ __volatile__ ( "\n1:\n\t"
/* Copy dest[i] */
"ldx.d %0, %3, %1\n\t"
"stx.d %0, %4, %1\n\t"
/* Loop */
"addi.w %1, %1, 8\n\t"
"bne %1, %5, 1b\n\t"
: "=&r" ( discard_source_i ),
"=&r" ( discard_offset ),
"=m" ( *dest )
: "r" ( source0 ),
"r" ( dest0 ),
"r" ( sizeof ( *dest ) ),
"1" ( 0 ) );
}
/**
+52 -7
View File
@@ -9,9 +9,6 @@
FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
FILE_SECBOOT ( PERMITTED );
#include <stdint.h>
#include <string.h>
/** Element of a big integer */
typedef unsigned long bigint_element_t;
@@ -209,10 +206,38 @@ bigint_shr_raw ( unsigned long *value0, unsigned int size ) {
static inline __attribute__ (( always_inline )) void
bigint_grow_raw ( const unsigned long *source0, unsigned int source_size,
unsigned long *dest0, unsigned int dest_size ) {
unsigned int pad_size = ( dest_size - source_size );
bigint_t ( dest_size ) __attribute__ (( may_alias )) *dest =
( ( void * ) dest0 );
const unsigned long *sourceN = ( source0 + source_size );
unsigned long *destN = ( dest0 + dest_size );
unsigned long *discard_source;
unsigned long *discard_dest;
unsigned long discard_source_i;
memcpy ( dest0, source0, sizeof ( bigint_t ( source_size ) ) );
memset ( ( dest0 + source_size ), 0, sizeof ( bigint_t ( pad_size ) ) );
__asm__ __volatile__ ( "\n1:\n\t"
/* Copy dest[i] */
LOADN " %2, (%0)\n\t"
STOREN " %2, (%1)\n\t"
/* Loop */
"addi %0, %0, %6\n\t"
"addi %1, %1, %6\n\t"
"bne %0, %4, 1b\n\t"
"j 3f\n\t"
"\n2:\n\t"
/* Zero dest[i] */
STOREN " zero, (%1)\n\t"
/* Loop */
"addi %1, %1, %6\n\t"
"\n3:\n\t"
"bne %1, %5, 2b\n\t"
: "=&r" ( discard_source ),
"=&r" ( discard_dest ),
"=&r" ( discard_source_i ),
"=m" ( *dest )
: "r" ( sourceN ),
"r" ( destN ),
"i" ( sizeof ( unsigned long ) ),
"0" ( source0 ), "1" ( dest0 ) );
}
/**
@@ -227,8 +252,28 @@ static inline __attribute__ (( always_inline )) void
bigint_shrink_raw ( const unsigned long *source0,
unsigned int source_size __unused,
unsigned long *dest0, unsigned int dest_size ) {
bigint_t ( dest_size ) __attribute__ (( may_alias )) *dest =
( ( void * ) dest0 );
unsigned long *destN = ( dest0 + dest_size );
unsigned long *discard_source;
unsigned long *discard_dest;
unsigned long discard_source_i;
memcpy ( dest0, source0, sizeof ( bigint_t ( dest_size ) ) );
__asm__ __volatile__ ( "\n1:\n\t"
/* Copy dest[i] */
LOADN " %2, (%0)\n\t"
STOREN " %2, (%1)\n\t"
/* Loop */
"addi %0, %0, %5\n\t"
"addi %1, %1, %5\n\t"
"bne %1, %4, 1b\n\t"
: "=&r" ( discard_source ),
"=&r" ( discard_dest ),
"=&r" ( discard_source_i ),
"=m" ( *dest )
: "r" ( destN ),
"i" ( sizeof ( unsigned long ) ),
"0" ( source0 ), "1" ( dest0 ) );
}
/**
-2
View File
@@ -8,8 +8,6 @@
FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
#include <stdint.h>
/** Element of a big integer */
typedef unsigned long bigint_element_t;
+1
View File
@@ -10,6 +10,7 @@ FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
FILE_SECBOOT ( PERMITTED );
#include <assert.h>
#include <stdint.h>
/**
* Define a big-integer type