diff options
author | Duncan Wilkie <antigravityd@gmail.com> | 2023-11-18 06:11:09 -0600 |
---|---|---|
committer | Duncan Wilkie <antigravityd@gmail.com> | 2023-11-18 06:11:09 -0600 |
commit | 11da511c784eca003deb90c23570f0873954e0de (patch) | |
tree | e14fdd3d5d6345956d67e79ae771d0633d28362b /gmp-6.3.0/mpn/x86_64/fat/fat.c |
Initial commit.
Diffstat (limited to 'gmp-6.3.0/mpn/x86_64/fat/fat.c')
-rw-r--r-- | gmp-6.3.0/mpn/x86_64/fat/fat.c | 473 |
1 files changed, 473 insertions, 0 deletions
diff --git a/gmp-6.3.0/mpn/x86_64/fat/fat.c b/gmp-6.3.0/mpn/x86_64/fat/fat.c new file mode 100644 index 0000000..cc35afa --- /dev/null +++ b/gmp-6.3.0/mpn/x86_64/fat/fat.c @@ -0,0 +1,473 @@ +/* x86_64 fat binary initializers. + + Contributed to the GNU project by Kevin Ryde (original x86_32 code) and + Torbjorn Granlund (port to x86_64) + + THE FUNCTIONS AND VARIABLES IN THIS FILE ARE FOR INTERNAL USE ONLY. + THEY'RE ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR + COMPLETELY IN FUTURE GNU MP RELEASES. + +Copyright 2003, 2004, 2009, 2011-2015, 2017 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + +or + + * the GNU General Public License as published by the Free Software + Foundation; either version 2 of the License, or (at your option) any + later version. + +or both in parallel, as here. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received copies of the GNU General Public License and the +GNU Lesser General Public License along with the GNU MP Library. If not, +see https://www.gnu.org/licenses/. */ + +#include <stdio.h> /* for printf */ +#include <stdlib.h> /* for getenv */ +#include <string.h> + +#include "gmp-impl.h" + +/* Change this to "#define TRACE(x) x" for some traces. */ +#define TRACE(x) + + +/* fat_entry.asm */ +long __gmpn_cpuid (char [12], int); + + +#if WANT_FAKE_CPUID +/* The "name"s in the table are values for the GMP_CPU_TYPE environment + variable. Anything can be used, but for now it's the canonical cpu types + as per config.guess/config.sub. */ + +#define __gmpn_cpuid fake_cpuid + +#define MAKE_FMS(family, model) \ + ((((family) & 0xf) << 8) + (((family) & 0xff0) << 20) \ + + (((model) & 0xf) << 4) + (((model) & 0xf0) << 12)) + +static struct { + const char *name; + const char *vendor; + unsigned fms; +} fake_cpuid_table[] = { + { "core2", "GenuineIntel", MAKE_FMS (6, 0xf) }, + { "nehalem", "GenuineIntel", MAKE_FMS (6, 0x1a) }, + { "nhm", "GenuineIntel", MAKE_FMS (6, 0x1a) }, + { "atom", "GenuineIntel", MAKE_FMS (6, 0x1c) }, + { "westmere", "GenuineIntel", MAKE_FMS (6, 0x25) }, + { "wsm", "GenuineIntel", MAKE_FMS (6, 0x25) }, + { "sandybridge","GenuineIntel", MAKE_FMS (6, 0x2a) }, + { "sbr", "GenuineIntel", MAKE_FMS (6, 0x2a) }, + { "silvermont", "GenuineIntel", MAKE_FMS (6, 0x37) }, + { "slm", "GenuineIntel", MAKE_FMS (6, 0x37) }, + { "haswell", "GenuineIntel", MAKE_FMS (6, 0x3c) }, + { "hwl", "GenuineIntel", MAKE_FMS (6, 0x3c) }, + { "broadwell", "GenuineIntel", MAKE_FMS (6, 0x3d) }, + { "bwl", "GenuineIntel", MAKE_FMS (6, 0x3d) }, + { "skylake", "GenuineIntel", MAKE_FMS (6, 0x5e) }, + { "sky", "GenuineIntel", MAKE_FMS (6, 0x5e) }, + { "pentium4", "GenuineIntel", MAKE_FMS (15, 3) }, + + { "k8", "AuthenticAMD", MAKE_FMS (15, 0) }, + { "k10", "AuthenticAMD", MAKE_FMS (16, 0) }, + { "bobcat", "AuthenticAMD", MAKE_FMS (20, 1) }, + { "bulldozer", "AuthenticAMD", MAKE_FMS (21, 1) }, + { "piledriver", "AuthenticAMD", MAKE_FMS (21, 2) }, + { "steamroller","AuthenticAMD", MAKE_FMS (21, 0x30) }, + { "excavator", "AuthenticAMD", MAKE_FMS (21, 0x60) }, + { "jaguar", "AuthenticAMD", MAKE_FMS (22, 1) }, + { "zen", "AuthenticAMD", MAKE_FMS (23, 1) }, + + { "nano", "CentaurHauls", MAKE_FMS (6, 15) }, +}; + +static int +fake_cpuid_lookup (void) +{ + char *s; + int i; + + s = getenv ("GMP_CPU_TYPE"); + if (s == NULL) + { + printf ("Need GMP_CPU_TYPE environment variable for fake cpuid\n"); + abort (); + } + + for (i = 0; i < numberof (fake_cpuid_table); i++) + if (strcmp (s, fake_cpuid_table[i].name) == 0) + return i; + + printf ("GMP_CPU_TYPE=%s unknown\n", s); + abort (); +} + +static long +fake_cpuid (char dst[12], unsigned int id) +{ + int i = fake_cpuid_lookup(); + + switch (id) { + case 0: + memcpy (dst, fake_cpuid_table[i].vendor, 12); + return 0; + case 1: + return fake_cpuid_table[i].fms; + case 7: + dst[0] = 0xff; /* BMI1, AVX2, etc */ + dst[1] = 0xff; /* BMI2, etc */ + return 0; + case 0x80000001: + dst[4 + 29 / 8] = (1 << (29 % 8)); /* "long" mode */ + return 0; + default: + printf ("fake_cpuid(): oops, unknown id %d\n", id); + abort (); + } +} +#endif + + +typedef DECL_preinv_divrem_1 ((*preinv_divrem_1_t)); +typedef DECL_preinv_mod_1 ((*preinv_mod_1_t)); + +struct cpuvec_t __gmpn_cpuvec = { + __MPN(add_n_init), + __MPN(addlsh1_n_init), + __MPN(addlsh2_n_init), + __MPN(addmul_1_init), + __MPN(addmul_2_init), + __MPN(bdiv_dbm1c_init), + __MPN(cnd_add_n_init), + __MPN(cnd_sub_n_init), + __MPN(com_init), + __MPN(copyd_init), + __MPN(copyi_init), + __MPN(divexact_1_init), + __MPN(divrem_1_init), + __MPN(gcd_11_init), + __MPN(lshift_init), + __MPN(lshiftc_init), + __MPN(mod_1_init), + __MPN(mod_1_1p_init), + __MPN(mod_1_1p_cps_init), + __MPN(mod_1s_2p_init), + __MPN(mod_1s_2p_cps_init), + __MPN(mod_1s_4p_init), + __MPN(mod_1s_4p_cps_init), + __MPN(mod_34lsub1_init), + __MPN(modexact_1c_odd_init), + __MPN(mul_1_init), + __MPN(mul_basecase_init), + __MPN(mullo_basecase_init), + __MPN(preinv_divrem_1_init), + __MPN(preinv_mod_1_init), + __MPN(redc_1_init), + __MPN(redc_2_init), + __MPN(rshift_init), + __MPN(sqr_basecase_init), + __MPN(sub_n_init), + __MPN(sublsh1_n_init), + __MPN(submul_1_init), + 0 +}; + +int __gmpn_cpuvec_initialized = 0; + +/* The following setups start with generic x86, then overwrite with + specifics for a chip, and higher versions of that chip. + + The arrangement of the setups here will normally be the same as the $path + selections in configure.in for the respective chips. + + This code is reentrant and thread safe. We always calculate the same + decided_cpuvec, so if two copies of the code are running it doesn't + matter which completes first, both write the same to __gmpn_cpuvec. + + We need to go via decided_cpuvec because if one thread has completed + __gmpn_cpuvec then it may be making use of the threshold values in that + vector. If another thread is still running __gmpn_cpuvec_init then we + don't want it to write different values to those fields since some of the + asm routines only operate correctly up to their own defined threshold, + not an arbitrary value. */ + +static int +gmp_workaround_skylake_cpuid_bug () +{ + char feature_string[49]; + char processor_name_string[49]; + static const char *bad_cpus[] = {" G44", " G45", " G39" /* , "6600" */ }; + int i; + + /* Example strings: */ + /* "Intel(R) Pentium(R) CPU G4400 @ 3.30GHz" */ + /* "Intel(R) Core(TM) i5-6600K CPU @ 3.50GHz" */ + /* ^ ^ ^ */ + /* 0x80000002 0x80000003 0x80000004 */ + /* We match out just the 0x80000003 part here. */ + + /* In their infinitive wisdom, Intel decided to use one register order for + the vendor string, and another for the processor name string. We shuffle + things about here, rather than write a new variant of our assembly cpuid. + */ + + unsigned int eax, ebx, ecx, edx; + eax = __gmpn_cpuid (feature_string, 0x80000003); + ebx = ((unsigned int *)feature_string)[0]; + edx = ((unsigned int *)feature_string)[1]; + ecx = ((unsigned int *)feature_string)[2]; + + ((unsigned int *) (processor_name_string))[0] = eax; + ((unsigned int *) (processor_name_string))[1] = ebx; + ((unsigned int *) (processor_name_string))[2] = ecx; + ((unsigned int *) (processor_name_string))[3] = edx; + + processor_name_string[16] = 0; + + for (i = 0; i < sizeof (bad_cpus) / sizeof (char *); i++) + { + if (strstr (processor_name_string, bad_cpus[i]) != 0) + return 1; + } + return 0; +} + +enum {BMI2_BIT = 8}; + +void +__gmpn_cpuvec_init (void) +{ + struct cpuvec_t decided_cpuvec; + char vendor_string[13]; + char dummy_string[12]; + long fms; + int family, model; + + TRACE (printf ("__gmpn_cpuvec_init:\n")); + + memset (&decided_cpuvec, '\0', sizeof (decided_cpuvec)); + + CPUVEC_SETUP_x86_64; + CPUVEC_SETUP_fat; + + __gmpn_cpuid (vendor_string, 0); + vendor_string[12] = 0; + + fms = __gmpn_cpuid (dummy_string, 1); + family = ((fms >> 8) & 0xf) + ((fms >> 20) & 0xff); + model = ((fms >> 4) & 0xf) + ((fms >> 12) & 0xf0); + + /* Check extended feature flags */ + __gmpn_cpuid (dummy_string, 0x80000001); + if ((dummy_string[4 + 29 / 8] & (1 << (29 % 8))) == 0) + abort (); /* longmode-capable-bit turned off! */ + + /*********************************************************/ + /*** WARNING: keep this list in sync with config.guess ***/ + /*********************************************************/ + if (strcmp (vendor_string, "GenuineIntel") == 0) + { + switch (family) + { + case 6: + switch (model) + { + case 0x0f: /* Conroe Merom Kentsfield Allendale */ + case 0x10: + case 0x11: + case 0x12: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: /* PNR Wolfdale Yorkfield */ + case 0x18: + case 0x19: + case 0x1d: /* PNR Dunnington */ + CPUVEC_SETUP_core2; + break; + + case 0x1c: /* Atom Silverthorne */ + case 0x26: /* Atom Lincroft */ + case 0x27: /* Atom Saltwell? */ + case 0x36: /* Atom Cedarview/Saltwell */ + CPUVEC_SETUP_atom; + break; + + case 0x1a: /* NHM Gainestown */ + case 0x1b: + case 0x1e: /* NHM Lynnfield/Jasper */ + case 0x1f: + case 0x20: + case 0x21: + case 0x22: + case 0x23: + case 0x24: + case 0x25: /* WSM Clarkdale/Arrandale */ + case 0x28: + case 0x29: + case 0x2b: + case 0x2c: /* WSM Gulftown */ + case 0x2e: /* NHM Beckton */ + case 0x2f: /* WSM Eagleton */ + CPUVEC_SETUP_core2; + CPUVEC_SETUP_coreinhm; + break; + + case 0x37: /* Silvermont */ + case 0x4a: /* Silvermont */ + case 0x4c: /* Airmont */ + case 0x4d: /* Silvermont/Avoton */ + case 0x5a: /* Silvermont */ + CPUVEC_SETUP_atom; + CPUVEC_SETUP_silvermont; + break; + + case 0x5c: /* Goldmont */ + case 0x5f: /* Goldmont */ + case 0x7a: /* Goldmont Plus */ + CPUVEC_SETUP_atom; + CPUVEC_SETUP_silvermont; + CPUVEC_SETUP_goldmont; + break; + + case 0x2a: /* SB */ + case 0x2d: /* SBC-EP */ + case 0x3a: /* IBR */ + case 0x3e: /* IBR Ivytown */ + CPUVEC_SETUP_core2; + CPUVEC_SETUP_coreinhm; + CPUVEC_SETUP_coreisbr; + break; + case 0x3c: /* Haswell client */ + case 0x3f: /* Haswell server */ + case 0x45: /* Haswell ULT */ + case 0x46: /* Crystal Well */ + CPUVEC_SETUP_core2; + CPUVEC_SETUP_coreinhm; + CPUVEC_SETUP_coreisbr; + /* Some Haswells lack BMI2. Let them appear as Sandybridges for + now. */ + __gmpn_cpuid (dummy_string, 7); + if ((dummy_string[0 + BMI2_BIT / 8] & (1 << (BMI2_BIT % 8))) == 0) + break; + CPUVEC_SETUP_coreihwl; + break; + case 0x3d: /* Broadwell */ + case 0x47: /* Broadwell */ + case 0x4f: /* Broadwell server */ + case 0x56: /* Broadwell microserver */ + CPUVEC_SETUP_core2; + CPUVEC_SETUP_coreinhm; + CPUVEC_SETUP_coreisbr; + if ((dummy_string[0 + BMI2_BIT / 8] & (1 << (BMI2_BIT % 8))) == 0) + break; + CPUVEC_SETUP_coreihwl; + CPUVEC_SETUP_coreibwl; + break; + case 0x4e: /* Skylake client */ + case 0x55: /* Skylake server */ + case 0x5e: /* Skylake */ + case 0x8e: /* Kabylake */ + case 0x9e: /* Kabylake */ + CPUVEC_SETUP_core2; + CPUVEC_SETUP_coreinhm; + CPUVEC_SETUP_coreisbr; + if ((dummy_string[0 + BMI2_BIT / 8] & (1 << (BMI2_BIT % 8))) == 0) + break; + if (gmp_workaround_skylake_cpuid_bug ()) + break; + CPUVEC_SETUP_coreihwl; + CPUVEC_SETUP_coreibwl; + CPUVEC_SETUP_skylake; + break; + } + break; + + case 15: + CPUVEC_SETUP_pentium4; + break; + } + } + else if (strcmp (vendor_string, "AuthenticAMD") == 0) + { + switch (family) + { + case 0x0f: /* k8 */ + case 0x11: /* "fam 11h", mix of k8 and k10 */ + case 0x13: + CPUVEC_SETUP_k8; + break; + + case 0x10: /* k10 */ + case 0x12: /* k10 (llano) */ + CPUVEC_SETUP_k8; + CPUVEC_SETUP_k10; + break; + + case 0x14: /* bobcat */ + CPUVEC_SETUP_k8; + CPUVEC_SETUP_k10; + CPUVEC_SETUP_bt1; + break; + + case 0x16: /* jaguar */ + CPUVEC_SETUP_k8; + CPUVEC_SETUP_k10; + CPUVEC_SETUP_bt1; + CPUVEC_SETUP_bt2; + break; + + case 0x15: /* bulldozer, piledriver, steamroller, excavator */ + CPUVEC_SETUP_k8; + CPUVEC_SETUP_k10; + CPUVEC_SETUP_bd1; + break; + + case 0x17: /* zen */ + case 0x19: /* zen3 */ + CPUVEC_SETUP_zen; + break; + } + } + else if (strcmp (vendor_string, "CentaurHauls") == 0) + { + switch (family) + { + case 6: + if (model >= 15) + CPUVEC_SETUP_nano; + break; + } + } + + /* There's no x86 generic mpn_preinv_divrem_1 or mpn_preinv_mod_1. + Instead default to the plain versions from whichever CPU we detected. + The function arguments are compatible, no need for any glue code. */ + if (decided_cpuvec.preinv_divrem_1 == NULL) + decided_cpuvec.preinv_divrem_1 =(preinv_divrem_1_t)decided_cpuvec.divrem_1; + if (decided_cpuvec.preinv_mod_1 == NULL) + decided_cpuvec.preinv_mod_1 =(preinv_mod_1_t) decided_cpuvec.mod_1; + + ASSERT_CPUVEC (decided_cpuvec); + CPUVEC_INSTALL (decided_cpuvec); + + /* Set this once the threshold fields are ready. + Use volatile to prevent it getting moved. */ + *((volatile int *) &__gmpn_cpuvec_initialized) = 1; +} |