diff --git a/.cvsignore b/.cvsignore index e52bea3..76cf729 100644 --- a/.cvsignore +++ b/.cvsignore @@ -1,2 +1,2 @@ -glibc-2.11-188-g0cbcca8-fedora.tar.bz2 -glibc-2.11-188-g0cbcca8.tar.bz2 +glibc-2.11-215-g199428c-fedora.tar.bz2 +glibc-2.11-215-g199428c.tar.bz2 diff --git a/glibc-fedora.patch b/glibc-fedora.patch index 401e40e..78b85bc 100644 --- a/glibc-fedora.patch +++ b/glibc-fedora.patch @@ -1,6 +1,20 @@ ---- glibc-2.11-188-g0cbcca8/ChangeLog -+++ glibc-2.11.90-12/ChangeLog -@@ -1,3 +1,16 @@ +--- glibc-2.11-215-g199428c/ChangeLog ++++ glibc-2.11.90-13/ChangeLog +@@ -179,6 +179,30 @@ + * sysdeps/x86_64/fpu/fegetenv.c: Likewise + * sysdeps/s390/fpu/fegetenv.c: Likewise. Remove unused headers. + ++2010-02-12 H.J. Lu ++ ++ * sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S: Use unsigned ++ conditional jumps. ++ (shl_0_gobble_cache_loop_tail): Removed. ++ * sysdeps/i386/i686/multiarch/memcpy-ssse3.S: Likewise. ++ ++ * sysdeps/i386/i686/multiarch/memset-sse2-rep.S: Use unsigned ++ conditional jumps. ++ * sysdeps/i386/i686/multiarch/memset-sse2.S: Likewise. ++ +2009-10-27 Aurelien Jarno + + [BZ #10855] @@ -17,76 +31,7 @@ 2010-02-05 H.J. Lu [BZ #11230] -@@ -274,68 +287,6 @@ - - * posix/sched.h: Define time_t and pid_t for XPG7. - --2010-01-12 H.J. Lu -- -- * sysdeps/i386/i686/bcopy.S: New file. -- -- * sysdeps/i386/i686/cacheinfo.c (__x86_64_data_cache_size): Define. -- -- * sysdeps/i386/i686/memcpy.S (__memcpy_chk): Use ENTRY_CHK -- and END_CHK. -- * sysdeps/i386/i686/memmove.S (__memmove_chk): Likewise. -- * sysdeps/i386/i686/mempcpy.S (__mempcpy_chk): Likewise. -- * sysdeps/i386/i686/memset.S (__memset_chk): Likewise. -- -- * sysdeps/i386/i686/memmove.S: Support USE_AS_BCOPY. -- -- * sysdeps/i386/i686/multiarch/Makefile (sysdep_routines): Add -- bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 memmove-ssse3 -- memcpy-ssse3-rep mempcpy-ssse3-rep memmove-ssse3-rep -- bcopy-ssse3 bcopy-ssse3-rep memset-sse2-rep bzero-sse2-rep -- * sysdeps/i386/i686/multiarch/bcopy-ssse3-rep.S: New file. -- * sysdeps/i386/i686/multiarch/bcopy-ssse3.S: New file. -- * sysdeps/i386/i686/multiarch/bcopy.S: New file. -- * sysdeps/i386/i686/multiarch/bzero-sse2-rep.S: New file. -- * sysdeps/i386/i686/multiarch/bzero-sse2.S: New file. -- * sysdeps/i386/i686/multiarch/bzero.S: New file. -- * sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S: New file. -- * sysdeps/i386/i686/multiarch/memcpy-ssse3.S: New file. -- * sysdeps/i386/i686/multiarch/memcpy.S: New file. -- * sysdeps/i386/i686/multiarch/memcpy_chk.S: New file. -- * sysdeps/i386/i686/multiarch/memmove-ssse3-rep.S: New file. -- * sysdeps/i386/i686/multiarch/memmove-ssse3.S: New file. -- * sysdeps/i386/i686/multiarch/memmove.S: New file. -- * sysdeps/i386/i686/multiarch/memmove_chk.S: New file. -- * sysdeps/i386/i686/multiarch/mempcpy-ssse3-rep.S: New file. -- * sysdeps/i386/i686/multiarch/mempcpy-ssse3.S: New file. -- * sysdeps/i386/i686/multiarch/mempcpy.S: New file. -- * sysdeps/i386/i686/multiarch/mempcpy_chk.S: New file. -- * sysdeps/i386/i686/multiarch/memset-sse2-rep.S: New file. -- * sysdeps/i386/i686/multiarch/memset-sse2.S: New file. -- * sysdeps/i386/i686/multiarch/memset.S: New file. -- * sysdeps/i386/i686/multiarch/memset_chk.S: New file. -- -- * sysdeps/i386/sysdep.h (ENTRY_CHK): New. -- (END_CHK): Likewise. -- -- * sysdeps/i386/i686/multiarch/ifunc-defines.sym: Add -- FEATURE_OFFSET, FEATURE_SIZE and FEATURE_INDEX_1. -- * sysdeps/x86_64/multiarch/ifunc-defines.sym: Likewise. -- -- * sysdeps/x86_64/cacheinfo.c (intel_02_cache_info): Add entries -- for 0x0e and 0x80. -- (__x86_64_data_cache_size): New. -- (init_cacheinfo): Set __x86_64_data_cache_size. -- -- * sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features): Turn -- on bit_Fast_Rep_String for Intel Core i7. -- -- * sysdeps/x86_64/multiarch/init-arch.h (bit_Fast_Rep_String): New. -- (index_Fast_Rep_String): Likewise. -- (FEATURE_INDEX_1): Likewise. -- (FEATURE_INDEX_MAX): Likewise. -- (cpu_features): Add feature. -- - 2010-01-12 Ulrich Drepper - - * conform/data/sys/select.h-data: Fix up for XPG7. -@@ -1940,6 +1891,11 @@ d2009-10-30 Ulrich Drepper +@@ -2121,6 +2145,11 @@ d2009-10-30 Ulrich Drepper * sysdeps/generic/ldsodefs.h (struct rtld_global): The map element in the unique symbol hash table should not be const. @@ -98,7 +43,7 @@ 2009-07-21 Ulrich Drepper * sysdeps/x86_64/multiarch/strstr.c: Minor cleanups. Remove -@@ -2205,6 +2161,11 @@ d2009-10-30 Ulrich Drepper +@@ -2386,6 +2415,11 @@ d2009-10-30 Ulrich Drepper out common code into new function get_common_indeces. Determine extended family and model for Intel processors. @@ -110,7 +55,7 @@ 2009-06-26 Ulrich Drepper * resolv/resolv.h: Define RES_SNGLKUPREOP. -@@ -10898,6 +10859,10 @@ d2009-10-30 Ulrich Drepper +@@ -11079,6 +11113,10 @@ d2009-10-30 Ulrich Drepper [BZ #4368] * stdlib/stdlib.h: Remove obsolete part of comment for realpath. @@ -121,7 +66,7 @@ 2007-04-16 Ulrich Drepper [BZ #4364] -@@ -12155,6 +12120,15 @@ d2009-10-30 Ulrich Drepper +@@ -12336,6 +12374,15 @@ d2009-10-30 Ulrich Drepper separators also if no non-zero digits found. * stdlib/Makefile (tests): Add tst-strtod3. @@ -137,8 +82,8 @@ 2006-12-09 Ulrich Drepper [BZ #3632] ---- glibc-2.11-188-g0cbcca8/ChangeLog.15 -+++ glibc-2.11.90-12/ChangeLog.15 +--- glibc-2.11-215-g199428c/ChangeLog.15 ++++ glibc-2.11.90-13/ChangeLog.15 @@ -477,6 +477,14 @@ 2004-11-26 Jakub Jelinek @@ -204,8 +149,8 @@ 2004-08-30 Roland McGrath * scripts/extract-abilist.awk: If `lastversion' variable defined, omit ---- glibc-2.11-188-g0cbcca8/ChangeLog.16 -+++ glibc-2.11.90-12/ChangeLog.16 +--- glibc-2.11-215-g199428c/ChangeLog.16 ++++ glibc-2.11.90-13/ChangeLog.16 @@ -2042,6 +2042,9 @@ (__MATHDECL_2): Use __REDIRECT_NTH instead of __REDIRECT followed by __THROW. @@ -259,8 +204,8 @@ 2005-07-28 Thomas Schwinge [BZ #1137] ---- glibc-2.11-188-g0cbcca8/Makeconfig -+++ glibc-2.11.90-12/Makeconfig +--- glibc-2.11-215-g199428c/Makeconfig ++++ glibc-2.11.90-13/Makeconfig @@ -780,12 +780,12 @@ endif # The assembler can generate debug information too. ifndef ASFLAGS @@ -277,8 +222,8 @@ ifndef BUILD_CC BUILD_CC = $(CC) ---- glibc-2.11-188-g0cbcca8/csu/Makefile -+++ glibc-2.11.90-12/csu/Makefile +--- glibc-2.11-215-g199428c/csu/Makefile ++++ glibc-2.11.90-13/csu/Makefile @@ -93,7 +93,8 @@ omit-deps += $(crtstuff) $(crtstuff:%=$(objpfx)%.o): %.o: %.S $(objpfx)defs.h $(compile.S) -g0 $(ASFLAGS-.os) -o $@ @@ -289,8 +234,8 @@ vpath initfini.c $(sysdirs) ---- glibc-2.11-188-g0cbcca8/csu/elf-init.c -+++ glibc-2.11.90-12/csu/elf-init.c +--- glibc-2.11-215-g199428c/csu/elf-init.c ++++ glibc-2.11.90-13/csu/elf-init.c @@ -63,6 +63,23 @@ extern void (*__init_array_end []) (int, char **, char **) extern void (*__fini_array_start []) (void) attribute_hidden; extern void (*__fini_array_end []) (void) attribute_hidden; @@ -315,8 +260,8 @@ /* These function symbols are provided for the .init/.fini section entry points automagically by the linker. */ ---- glibc-2.11-188-g0cbcca8/debug/tst-chk1.c -+++ glibc-2.11.90-12/debug/tst-chk1.c +--- glibc-2.11-215-g199428c/debug/tst-chk1.c ++++ glibc-2.11.90-13/debug/tst-chk1.c @@ -17,6 +17,9 @@ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ @@ -345,8 +290,8 @@ # define O 0 # else # define O 1 ---- glibc-2.11-188-g0cbcca8/elf/ldconfig.c -+++ glibc-2.11.90-12/elf/ldconfig.c +--- glibc-2.11-215-g199428c/elf/ldconfig.c ++++ glibc-2.11.90-13/elf/ldconfig.c @@ -1020,17 +1020,19 @@ search_dirs (void) @@ -428,8 +373,8 @@ } if (! opt_ignore_aux_cache) ---- glibc-2.11-188-g0cbcca8/elf/tst-stackguard1.c -+++ glibc-2.11.90-12/elf/tst-stackguard1.c +--- glibc-2.11-215-g199428c/elf/tst-stackguard1.c ++++ glibc-2.11.90-13/elf/tst-stackguard1.c @@ -160,17 +160,21 @@ do_test (void) the 16 runs, something is very wrong. */ int ndifferences = 0; @@ -454,16 +399,16 @@ { puts ("stack guard canaries are not randomized enough"); puts ("nor equal to the default canary value"); ---- glibc-2.11-188-g0cbcca8/include/bits/stdlib-ldbl.h -+++ glibc-2.11.90-12/include/bits/stdlib-ldbl.h +--- glibc-2.11-215-g199428c/include/bits/stdlib-ldbl.h ++++ glibc-2.11.90-13/include/bits/stdlib-ldbl.h @@ -0,0 +1 @@ +#include ---- glibc-2.11-188-g0cbcca8/include/bits/wchar-ldbl.h -+++ glibc-2.11.90-12/include/bits/wchar-ldbl.h +--- glibc-2.11-215-g199428c/include/bits/wchar-ldbl.h ++++ glibc-2.11.90-13/include/bits/wchar-ldbl.h @@ -0,0 +1 @@ +#include ---- glibc-2.11-188-g0cbcca8/include/features.h -+++ glibc-2.11.90-12/include/features.h +--- glibc-2.11-215-g199428c/include/features.h ++++ glibc-2.11.90-13/include/features.h @@ -308,8 +308,13 @@ #endif @@ -480,8 +425,8 @@ # define __USE_FORTIFY_LEVEL 2 # else # define __USE_FORTIFY_LEVEL 1 ---- glibc-2.11-188-g0cbcca8/intl/locale.alias -+++ glibc-2.11.90-12/intl/locale.alias +--- glibc-2.11-215-g199428c/intl/locale.alias ++++ glibc-2.11.90-13/intl/locale.alias @@ -57,8 +57,6 @@ korean ko_KR.eucKR korean.euc ko_KR.eucKR ko_KR ko_KR.eucKR @@ -491,8 +436,8 @@ norwegian nb_NO.ISO-8859-1 nynorsk nn_NO.ISO-8859-1 polish pl_PL.ISO-8859-2 ---- glibc-2.11-188-g0cbcca8/libio/stdio.h -+++ glibc-2.11.90-12/libio/stdio.h +--- glibc-2.11-215-g199428c/libio/stdio.h ++++ glibc-2.11.90-13/libio/stdio.h @@ -165,10 +165,12 @@ typedef _G_fpos64_t fpos64_t; extern struct _IO_FILE *stdin; /* Standard input stream. */ extern struct _IO_FILE *stdout; /* Standard output stream. */ @@ -506,8 +451,8 @@ __BEGIN_NAMESPACE_STD /* Remove file FILENAME. */ ---- glibc-2.11-188-g0cbcca8/locale/iso-4217.def -+++ glibc-2.11.90-12/locale/iso-4217.def +--- glibc-2.11-215-g199428c/locale/iso-4217.def ++++ glibc-2.11.90-13/locale/iso-4217.def @@ -8,6 +8,7 @@ * * !!! The list has to be sorted !!! @@ -599,8 +544,8 @@ DEFINE_INT_CURR("PYG") /* Paraguay Guarani */ DEFINE_INT_CURR("QAR") /* Qatar Rial */ DEFINE_INT_CURR("ROL") /* Romanian Leu */ ---- glibc-2.11-188-g0cbcca8/locale/programs/locarchive.c -+++ glibc-2.11.90-12/locale/programs/locarchive.c +--- glibc-2.11-215-g199428c/locale/programs/locarchive.c ++++ glibc-2.11.90-13/locale/programs/locarchive.c @@ -134,7 +134,7 @@ create_archive (const char *archivefname, struct locarhandle *ah) size_t reserved = RESERVE_MMAP_SIZE; int xflags = 0; @@ -659,8 +604,8 @@ add_alias (struct locarhandle *ah, const char *alias, bool replace, const char *oldname, uint32_t *locrec_offset_p) { ---- glibc-2.11-188-g0cbcca8/localedata/Makefile -+++ glibc-2.11.90-12/localedata/Makefile +--- glibc-2.11-215-g199428c/localedata/Makefile ++++ glibc-2.11.90-13/localedata/Makefile @@ -225,6 +225,7 @@ $(INSTALL-SUPPORTED-LOCALES): install-locales-dir echo -n '...'; \ input=`echo $$locale | sed 's/\([^.]*\)[^@]*\(.*\)/\1\2/'`; \ @@ -669,8 +614,8 @@ -i locales/$$input -c -f charmaps/$$charset \ $(addprefix --prefix=,$(install_root)) $$locale; \ echo ' done'; \ ---- glibc-2.11-188-g0cbcca8/localedata/SUPPORTED -+++ glibc-2.11.90-12/localedata/SUPPORTED +--- glibc-2.11-215-g199428c/localedata/SUPPORTED ++++ glibc-2.11.90-13/localedata/SUPPORTED @@ -84,6 +84,7 @@ cy_GB.UTF-8/UTF-8 \ cy_GB/ISO-8859-14 \ da_DK.UTF-8/UTF-8 \ @@ -712,8 +657,8 @@ ta_IN/UTF-8 \ te_IN/UTF-8 \ tg_TJ.UTF-8/UTF-8 \ ---- glibc-2.11-188-g0cbcca8/localedata/locales/cy_GB -+++ glibc-2.11.90-12/localedata/locales/cy_GB +--- glibc-2.11-215-g199428c/localedata/locales/cy_GB ++++ glibc-2.11.90-13/localedata/locales/cy_GB @@ -248,8 +248,11 @@ mon "";/ d_t_fmt "" d_fmt "" @@ -728,8 +673,8 @@ END LC_TIME LC_MESSAGES ---- glibc-2.11-188-g0cbcca8/localedata/locales/en_GB -+++ glibc-2.11.90-12/localedata/locales/en_GB +--- glibc-2.11-215-g199428c/localedata/locales/en_GB ++++ glibc-2.11.90-13/localedata/locales/en_GB @@ -116,8 +116,8 @@ mon "";/ d_t_fmt "" d_fmt "" @@ -741,8 +686,8 @@ date_fmt "/ / " ---- glibc-2.11-188-g0cbcca8/localedata/locales/no_NO -+++ glibc-2.11.90-12/localedata/locales/no_NO +--- glibc-2.11-215-g199428c/localedata/locales/no_NO ++++ glibc-2.11.90-13/localedata/locales/no_NO @@ -0,0 +1,69 @@ +escape_char / +comment_char % @@ -813,8 +758,8 @@ +LC_ADDRESS +copy "nb_NO" +END LC_ADDRESS ---- glibc-2.11-188-g0cbcca8/localedata/locales/zh_TW -+++ glibc-2.11.90-12/localedata/locales/zh_TW +--- glibc-2.11-215-g199428c/localedata/locales/zh_TW ++++ glibc-2.11.90-13/localedata/locales/zh_TW @@ -1,7 +1,7 @@ comment_char % escape_char / @@ -842,8 +787,8 @@ revision "0.2" date "2000-08-02" % ---- glibc-2.11-188-g0cbcca8/malloc/mcheck.c -+++ glibc-2.11.90-12/malloc/mcheck.c +--- glibc-2.11-215-g199428c/malloc/mcheck.c ++++ glibc-2.11.90-13/malloc/mcheck.c @@ -24,9 +24,25 @@ # include # include @@ -919,8 +864,8 @@ old_free_hook = __free_hook; __free_hook = freehook; ---- glibc-2.11-188-g0cbcca8/manual/libc.texinfo -+++ glibc-2.11.90-12/manual/libc.texinfo +--- glibc-2.11-215-g199428c/manual/libc.texinfo ++++ glibc-2.11.90-13/manual/libc.texinfo @@ -5,7 +5,7 @@ @c setchapternewpage odd @@ -930,8 +875,8 @@ @direntry * Libc: (libc). C library. @end direntry ---- glibc-2.11-188-g0cbcca8/misc/sys/cdefs.h -+++ glibc-2.11.90-12/misc/sys/cdefs.h +--- glibc-2.11-215-g199428c/misc/sys/cdefs.h ++++ glibc-2.11.90-13/misc/sys/cdefs.h @@ -132,7 +132,10 @@ #define __bos(ptr) __builtin_object_size (ptr, __USE_FORTIFY_LEVEL > 1) #define __bos0(ptr) __builtin_object_size (ptr, 0) @@ -975,16 +920,16 @@ # define __va_arg_pack() __builtin_va_arg_pack () # define __va_arg_pack_len() __builtin_va_arg_pack_len () #endif ---- glibc-2.11-188-g0cbcca8/nis/nss -+++ glibc-2.11.90-12/nis/nss +--- glibc-2.11-215-g199428c/nis/nss ++++ glibc-2.11.90-13/nis/nss @@ -25,4 +25,4 @@ # memory with every getXXent() call. Otherwise each getXXent() call # might result into a network communication with the server to get # the next entry. -#SETENT_BATCH_READ=TRUE +SETENT_BATCH_READ=TRUE ---- glibc-2.11-188-g0cbcca8/nptl/ChangeLog -+++ glibc-2.11.90-12/nptl/ChangeLog +--- glibc-2.11-215-g199428c/nptl/ChangeLog ++++ glibc-2.11.90-13/nptl/ChangeLog @@ -3834,6 +3834,15 @@ Use __sigfillset. Document that sigfillset does the right thing wrt to SIGSETXID. @@ -1025,8 +970,8 @@ 2003-07-25 Jakub Jelinek * tst-cancel17.c (do_test): Check if aio_cancel failed. ---- glibc-2.11-188-g0cbcca8/nptl/Makefile -+++ glibc-2.11.90-12/nptl/Makefile +--- glibc-2.11-215-g199428c/nptl/Makefile ++++ glibc-2.11.90-13/nptl/Makefile @@ -339,7 +339,8 @@ endif extra-objs += $(crti-objs) $(crtn-objs) omit-deps += crti crtn @@ -1059,8 +1004,8 @@ else $(addprefix $(objpfx),$(tests) $(test-srcs)): $(objpfx)libpthread.a endif ---- glibc-2.11-188-g0cbcca8/nptl/sysdeps/unix/sysv/linux/bits/posix_opt.h -+++ glibc-2.11.90-12/nptl/sysdeps/unix/sysv/linux/bits/posix_opt.h +--- glibc-2.11-215-g199428c/nptl/sysdeps/unix/sysv/linux/bits/posix_opt.h ++++ glibc-2.11.90-13/nptl/sysdeps/unix/sysv/linux/bits/posix_opt.h @@ -189,4 +189,7 @@ /* Typed memory objects are not available. */ #define _POSIX_TYPED_MEMORY_OBJECTS -1 @@ -1069,8 +1014,8 @@ +#define _XOPEN_STREAMS -1 + #endif /* bits/posix_opt.h */ ---- glibc-2.11-188-g0cbcca8/nptl/sysdeps/unix/sysv/linux/kernel-features.h -+++ glibc-2.11.90-12/nptl/sysdeps/unix/sysv/linux/kernel-features.h +--- glibc-2.11-215-g199428c/nptl/sysdeps/unix/sysv/linux/kernel-features.h ++++ glibc-2.11.90-13/nptl/sysdeps/unix/sysv/linux/kernel-features.h @@ -0,0 +1,6 @@ +#include_next + @@ -1078,8 +1023,8 @@ +#ifndef __ASSUME_CLONE_THREAD_FLAGS +# define __ASSUME_CLONE_THREAD_FLAGS 1 +#endif ---- glibc-2.11-188-g0cbcca8/nptl/tst-stackguard1.c -+++ glibc-2.11.90-12/nptl/tst-stackguard1.c +--- glibc-2.11-215-g199428c/nptl/tst-stackguard1.c ++++ glibc-2.11.90-13/nptl/tst-stackguard1.c @@ -190,17 +190,21 @@ do_test (void) the 16 runs, something is very wrong. */ int ndifferences = 0; @@ -1104,8 +1049,8 @@ { puts ("stack guard canaries are not randomized enough"); puts ("nor equal to the default canary value"); ---- glibc-2.11-188-g0cbcca8/nscd/nscd.conf -+++ glibc-2.11.90-12/nscd/nscd.conf +--- glibc-2.11-215-g199428c/nscd/nscd.conf ++++ glibc-2.11.90-13/nscd/nscd.conf @@ -33,7 +33,7 @@ # logfile /var/log/nscd.log # threads 4 @@ -1115,8 +1060,8 @@ # stat-user somebody debug-level 0 # reload-count 5 ---- glibc-2.11-188-g0cbcca8/nscd/nscd.init -+++ glibc-2.11.90-12/nscd/nscd.init +--- glibc-2.11-215-g199428c/nscd/nscd.init ++++ glibc-2.11.90-13/nscd/nscd.init @@ -9,6 +9,7 @@ # slow naming services like NIS, NIS+, LDAP, or hesiod. # processname: /usr/sbin/nscd @@ -1188,8 +1133,8 @@ ;; *) echo $"Usage: $0 {start|stop|status|restart|reload|condrestart}" ---- glibc-2.11-188-g0cbcca8/posix/Makefile -+++ glibc-2.11.90-12/posix/Makefile +--- glibc-2.11-215-g199428c/posix/Makefile ++++ glibc-2.11.90-13/posix/Makefile @@ -303,15 +303,8 @@ $(inst_libexecdir)/getconf: $(inst_bindir)/getconf \ mv -f $@/$$spec.new $@/$$spec; \ done < $(objpfx)getconf.speclist @@ -1210,8 +1155,8 @@ + | sed -n -e '/START_OF_STRINGS/,$${/\(POSIX_V[67]\|_XBS5\)_/{s/^[^"]*"//;s/".*$$//;p}}' \ + > $@.new mv -f $@.new $@ ---- glibc-2.11-188-g0cbcca8/posix/getconf.speclist.h -+++ glibc-2.11.90-12/posix/getconf.speclist.h +--- glibc-2.11-215-g199428c/posix/getconf.speclist.h ++++ glibc-2.11.90-13/posix/getconf.speclist.h @@ -0,0 +1,39 @@ +#include +const char *START_OF_STRINGS = @@ -1252,8 +1197,8 @@ +"XBS5_LPBIG_OFFBIG" +#endif +""; ---- glibc-2.11-188-g0cbcca8/streams/Makefile -+++ glibc-2.11.90-12/streams/Makefile +--- glibc-2.11-215-g199428c/streams/Makefile ++++ glibc-2.11.90-13/streams/Makefile @@ -21,7 +21,7 @@ # subdir := streams @@ -1263,8 +1208,8 @@ routines = isastream getmsg getpmsg putmsg putpmsg fattach fdetach include ../Rules ---- glibc-2.11-188-g0cbcca8/sysdeps/generic/dl-cache.h -+++ glibc-2.11.90-12/sysdeps/generic/dl-cache.h +--- glibc-2.11-215-g199428c/sysdeps/generic/dl-cache.h ++++ glibc-2.11.90-13/sysdeps/generic/dl-cache.h @@ -36,6 +36,14 @@ # define add_system_dir(dir) add_dir (dir) #endif @@ -1280,8 +1225,8 @@ #define CACHEMAGIC "ld.so-1.7.0" /* libc5 and glibc 2.0/2.1 use the same format. For glibc 2.2 another ---- glibc-2.11-188-g0cbcca8/sysdeps/i386/Makefile -+++ glibc-2.11.90-12/sysdeps/i386/Makefile +--- glibc-2.11-215-g199428c/sysdeps/i386/Makefile ++++ glibc-2.11.90-13/sysdeps/i386/Makefile @@ -2,6 +2,8 @@ # Every i386 port in use uses gas syntax (I think). asm-CPPFLAGS += -DGAS_SYNTAX @@ -1306,6430 +1251,715 @@ endif ifeq ($(subdir),elf) ---- glibc-2.11-188-g0cbcca8/sysdeps/i386/i686/bcopy.S -+++ glibc-2.11.90-12//dev/null -@@ -1,3 +0,0 @@ --#define USE_AS_BCOPY --#define memmove bcopy --#include ---- glibc-2.11-188-g0cbcca8/sysdeps/i386/i686/cacheinfo.c -+++ glibc-2.11.90-12/sysdeps/i386/i686/cacheinfo.c -@@ -1,4 +1,3 @@ --#define __x86_64_data_cache_size __x86_data_cache_size - #define __x86_64_data_cache_size_half __x86_data_cache_size_half - #define __x86_64_shared_cache_size __x86_shared_cache_size - #define __x86_64_shared_cache_size_half __x86_shared_cache_size_half ---- glibc-2.11-188-g0cbcca8/sysdeps/i386/i686/memcpy.S -+++ glibc-2.11.90-12/sysdeps/i386/i686/memcpy.S -@@ -32,11 +32,11 @@ - - .text - #if defined PIC && !defined NOT_IN_libc --ENTRY_CHK (__memcpy_chk) -+ENTRY (__memcpy_chk) - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb HIDDEN_JUMPTARGET (__chk_fail) --END_CHK (__memcpy_chk) -+END (__memcpy_chk) - #endif - ENTRY (BP_SYM (memcpy)) - ENTER ---- glibc-2.11-188-g0cbcca8/sysdeps/i386/i686/memmove.S -+++ glibc-2.11.90-12/sysdeps/i386/i686/memmove.S -@@ -26,27 +26,18 @@ - - #define PARMS LINKAGE+4 /* one spilled register */ - #define RTN PARMS -+#define DEST RTN+RTN_SIZE -+#define SRC DEST+PTR_SIZE -+#define LEN SRC+PTR_SIZE - - .text -- --#ifdef USE_AS_BCOPY --# define SRC RTN+RTN_SIZE --# define DEST SRC+PTR_SIZE --# define LEN DEST+PTR_SIZE --#else --# define DEST RTN+RTN_SIZE --# define SRC DEST+PTR_SIZE --# define LEN SRC+PTR_SIZE -- --# if defined PIC && !defined NOT_IN_libc --ENTRY_CHK (__memmove_chk) -+#if defined PIC && !defined NOT_IN_libc -+ENTRY (__memmove_chk) - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb HIDDEN_JUMPTARGET (__chk_fail) --END_CHK (__memmove_chk) --# endif -+END (__memmove_chk) +--- glibc-2.11-215-g199428c/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S ++++ glibc-2.11.90-13/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S +@@ -128,7 +128,7 @@ ENTRY (MEMCPY) + jb L(copy_forward) + je L(fwd_write_0bytes) + cmp $32, %ecx +- jge L(memmove_bwd) ++ jae L(memmove_bwd) + jmp L(bk_write_less32bytes_2) + L(memmove_bwd): + add %ecx, %eax +@@ -139,12 +139,12 @@ L(memmove_bwd): + L(copy_forward): #endif -- - ENTRY (BP_SYM (memmove)) - ENTER - -@@ -78,10 +69,8 @@ ENTRY (BP_SYM (memmove)) - movsl - movl %edx, %esi - cfi_restore (esi) --#ifndef USE_AS_BCOPY - movl DEST(%esp), %eax - RETURN_BOUNDED_POINTER (DEST(%esp)) --#endif - - popl %edi - cfi_adjust_cfa_offset (-4) -@@ -112,10 +101,8 @@ ENTRY (BP_SYM (memmove)) - movsl - movl %edx, %esi - cfi_restore (esi) --#ifndef USE_AS_BCOPY - movl DEST(%esp), %eax - RETURN_BOUNDED_POINTER (DEST(%esp)) --#endif + cmp $48, %ecx +- jge L(48bytesormore) ++ jae L(48bytesormore) - cld - popl %edi -@@ -125,6 +112,4 @@ ENTRY (BP_SYM (memmove)) - LEAVE - RET_PTR - END (BP_SYM (memmove)) --#ifndef USE_AS_BCOPY - libc_hidden_builtin_def (memmove) --#endif ---- glibc-2.11-188-g0cbcca8/sysdeps/i386/i686/mempcpy.S -+++ glibc-2.11.90-12/sysdeps/i386/i686/mempcpy.S -@@ -32,11 +32,11 @@ - - .text - #if defined PIC && !defined NOT_IN_libc --ENTRY_CHK (__mempcpy_chk) -+ENTRY (__mempcpy_chk) - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb HIDDEN_JUMPTARGET (__chk_fail) --END_CHK (__mempcpy_chk) -+END (__mempcpy_chk) + L(fwd_write_less32bytes): + #ifndef USE_AS_MEMMOVE + cmp %dl, %al +- jl L(bk_write) ++ jb L(bk_write) #endif - ENTRY (BP_SYM (__mempcpy)) - ENTER ---- glibc-2.11-188-g0cbcca8/sysdeps/i386/i686/memset.S -+++ glibc-2.11.90-12/sysdeps/i386/i686/memset.S -@@ -40,11 +40,11 @@ - - .text - #if defined PIC && !defined NOT_IN_libc && !BZERO_P --ENTRY_CHK (__memset_chk) -+ENTRY (__memset_chk) - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb HIDDEN_JUMPTARGET (__chk_fail) --END_CHK (__memset_chk) -+END (__memset_chk) + add %ecx, %edx + add %ecx, %eax +@@ -181,7 +181,7 @@ L(48bytesormore): #endif - ENTRY (BP_SYM (memset)) - ENTER ---- glibc-2.11-188-g0cbcca8/sysdeps/i386/i686/multiarch/Makefile -+++ glibc-2.11.90-12/sysdeps/i386/i686/multiarch/Makefile -@@ -4,10 +4,6 @@ gen-as-const-headers += ifunc-defines.sym - endif - ifeq ($(subdir),string) --sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \ -- memmove-ssse3 memcpy-ssse3-rep mempcpy-ssse3-rep \ -- memmove-ssse3-rep bcopy-ssse3 bcopy-ssse3-rep \ -- memset-sse2-rep bzero-sse2-rep - ifeq (yes,$(config-cflags-sse4)) - sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c - CFLAGS-strcspn-c.c += -msse4 ---- glibc-2.11-188-g0cbcca8/sysdeps/i386/i686/multiarch/bcopy-ssse3-rep.S -+++ glibc-2.11.90-12//dev/null -@@ -1,4 +0,0 @@ --#define USE_AS_MEMMOVE --#define USE_AS_BCOPY --#define MEMCPY __bcopy_ssse3_rep --#include "memcpy-ssse3-rep.S" ---- glibc-2.11-188-g0cbcca8/sysdeps/i386/i686/multiarch/bcopy-ssse3.S -+++ glibc-2.11.90-12//dev/null -@@ -1,4 +0,0 @@ --#define USE_AS_MEMMOVE --#define USE_AS_BCOPY --#define MEMCPY __bcopy_ssse3 --#include "memcpy-ssse3.S" ---- glibc-2.11-188-g0cbcca8/sysdeps/i386/i686/multiarch/bcopy.S -+++ glibc-2.11.90-12//dev/null -@@ -1,89 +0,0 @@ --/* Multiple versions of bcopy -- Copyright (C) 2010 Free Software Foundation, Inc. -- Contributed by Intel Corporation. -- This file is part of the GNU C Library. -- -- The GNU C Library is free software; you can redistribute it and/or -- modify it under the terms of the GNU Lesser General Public -- License as published by the Free Software Foundation; either -- version 2.1 of the License, or (at your option) any later version. -- -- The GNU C Library is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- Lesser General Public License for more details. -- -- You should have received a copy of the GNU Lesser General Public -- License along with the GNU C Library; if not, write to the Free -- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -- 02111-1307 USA. */ -- --#include --#include -- --/* Define multiple versions only for the definition in lib. */ --#ifndef NOT_IN_libc --# ifdef SHARED -- .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits -- .globl __i686.get_pc_thunk.bx -- .hidden __i686.get_pc_thunk.bx -- .p2align 4 -- .type __i686.get_pc_thunk.bx,@function --__i686.get_pc_thunk.bx: -- movl (%esp), %ebx -- ret -- -- .text --ENTRY(bcopy) -- .type bcopy, @gnu_indirect_function -- pushl %ebx -- cfi_adjust_cfa_offset (4) -- cfi_rel_offset (ebx, 0) -- call __i686.get_pc_thunk.bx -- addl $_GLOBAL_OFFSET_TABLE_, %ebx -- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx) -- jne 1f -- call __init_cpu_features --1: leal __bcopy_ia32@GOTOFF(%ebx), %eax -- testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx) -- jz 2f -- leal __bcopy_ssse3@GOTOFF(%ebx), %eax -- testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx) -- jz 2f -- leal __bcopy_ssse3_rep@GOTOFF(%ebx), %eax --2: popl %ebx -- cfi_adjust_cfa_offset (-4) -- cfi_restore (ebx) -- ret --END(bcopy) --# else -- .text --ENTRY(bcopy) -- .type bcopy, @gnu_indirect_function -- cmpl $0, KIND_OFFSET+__cpu_features -- jne 1f -- call __init_cpu_features --1: leal __bcopy_ia32, %eax -- testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features -- jz 2f -- leal __bcopy_ssse3, %eax -- testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features -- jz 2f -- leal __bcopy_ssse3_rep, %eax --2: ret --END(bcopy) --# endif -- --# undef ENTRY --# define ENTRY(name) \ -- .type __bcopy_ia32, @function; \ -- .p2align 4; \ -- __bcopy_ia32: cfi_startproc; \ -- CALL_MCOUNT --# undef END --# define END(name) \ -- cfi_endproc; .size __bcopy_ia32, .-__bcopy_ia32 -- --#endif -- --#include "../bcopy.S" ---- glibc-2.11-188-g0cbcca8/sysdeps/i386/i686/multiarch/bzero-sse2-rep.S -+++ glibc-2.11.90-12//dev/null -@@ -1,3 +0,0 @@ --#define USE_AS_BZERO --#define __memset_sse2_rep __bzero_sse2_rep --#include "memset-sse2-rep.S" ---- glibc-2.11-188-g0cbcca8/sysdeps/i386/i686/multiarch/bzero-sse2.S -+++ glibc-2.11.90-12//dev/null -@@ -1,3 +0,0 @@ --#define USE_AS_BZERO --#define __memset_sse2 __bzero_sse2 --#include "memset-sse2.S" ---- glibc-2.11-188-g0cbcca8/sysdeps/i386/i686/multiarch/bzero.S -+++ glibc-2.11.90-12//dev/null -@@ -1,97 +0,0 @@ --/* Multiple versions of bzero -- Copyright (C) 2010 Free Software Foundation, Inc. -- Contributed by Intel Corporation. -- This file is part of the GNU C Library. -- -- The GNU C Library is free software; you can redistribute it and/or -- modify it under the terms of the GNU Lesser General Public -- License as published by the Free Software Foundation; either -- version 2.1 of the License, or (at your option) any later version. -- -- The GNU C Library is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- Lesser General Public License for more details. -- -- You should have received a copy of the GNU Lesser General Public -- License along with the GNU C Library; if not, write to the Free -- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -- 02111-1307 USA. */ -- --#include --#include -- --/* Define multiple versions only for the definition in lib. */ --#ifndef NOT_IN_libc --# ifdef SHARED -- .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits -- .globl __i686.get_pc_thunk.bx -- .hidden __i686.get_pc_thunk.bx -- .p2align 4 -- .type __i686.get_pc_thunk.bx,@function --__i686.get_pc_thunk.bx: -- movl (%esp), %ebx -- ret -- -- .text --ENTRY(__bzero) -- .type __bzero, @gnu_indirect_function -- pushl %ebx -- cfi_adjust_cfa_offset (4) -- cfi_rel_offset (ebx, 0) -- call __i686.get_pc_thunk.bx -- addl $_GLOBAL_OFFSET_TABLE_, %ebx -- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx) -- jne 1f -- call __init_cpu_features --1: leal __bzero_ia32@GOTOFF(%ebx), %eax -- testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx) -- jz 2f -- leal __bzero_sse2@GOTOFF(%ebx), %eax -- testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx) -- jz 2f -- leal __bzero_sse2_rep@GOTOFF(%ebx), %eax --2: popl %ebx -- cfi_adjust_cfa_offset (-4) -- cfi_restore (ebx) -- ret --END(__bzero) --# else -- .text --ENTRY(__bzero) -- .type __bzero, @gnu_indirect_function -- cmpl $0, KIND_OFFSET+__cpu_features -- jne 1f -- call __init_cpu_features --1: leal __bzero_ia32, %eax -- testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features -- jz 2f -- leal __bzero_sse2, %eax -- testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features -- jz 2f -- leal __bzero_sse2_rep, %eax --2: ret --END(__bzero) --# endif -- --# undef ENTRY --# define ENTRY(name) \ -- .type __bzero_ia32, @function; \ -- .p2align 4; \ -- __bzero_ia32: cfi_startproc; \ -- CALL_MCOUNT --# undef END --# define END(name) \ -- cfi_endproc; .size __bzero_ia32, .-__bzero_ia32 -- --# ifdef SHARED --# undef libc_hidden_builtin_def --/* IFUNC doesn't work with the hidden functions in shared library since -- they will be called without setting up EBX needed for PLT which is -- used by IFUNC. */ --# define libc_hidden_builtin_def(name) \ -- .globl __GI___bzero; __GI___bzero = __bzero_ia32 --# endif --#endif -- --#include "../bzero.S" ---- glibc-2.11-188-g0cbcca8/sysdeps/i386/i686/multiarch/ifunc-defines.sym -+++ glibc-2.11.90-12/sysdeps/i386/i686/multiarch/ifunc-defines.sym -@@ -13,8 +13,5 @@ CPUID_ECX_OFFSET offsetof (struct cpuid_registers, ecx) - CPUID_EDX_OFFSET offsetof (struct cpuid_registers, edx) - FAMILY_OFFSET offsetof (struct cpu_features, family) - MODEL_OFFSET offsetof (struct cpu_features, model) --FEATURE_OFFSET offsetof (struct cpu_features, feature) --FEATURE_SIZE sizeof (unsigned int) - - COMMON_CPUID_INDEX_1 --FEATURE_INDEX_1 ---- glibc-2.11-188-g0cbcca8/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S -+++ glibc-2.11.90-12//dev/null -@@ -1,1785 +0,0 @@ --/* memcpy with SSSE3 and REP string. -- Copyright (C) 2010 Free Software Foundation, Inc. -- Contributed by Intel Corporation. -- This file is part of the GNU C Library. -- -- The GNU C Library is free software; you can redistribute it and/or -- modify it under the terms of the GNU Lesser General Public -- License as published by the Free Software Foundation; either -- version 2.1 of the License, or (at your option) any later version. -- -- The GNU C Library is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- Lesser General Public License for more details. -- -- You should have received a copy of the GNU Lesser General Public -- License along with the GNU C Library; if not, write to the Free -- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -- 02111-1307 USA. */ -- --#include -- --#if !defined NOT_IN_libc \ -- && (defined SHARED \ -- || defined USE_AS_MEMMOVE \ -- || !defined USE_MULTIARCH) -- --#include "asm-syntax.h" -- --#ifndef MEMCPY --# define MEMCPY __memcpy_ssse3_rep --# define MEMCPY_CHK __memcpy_chk_ssse3_rep --#endif -- --#ifdef USE_AS_BCOPY --# define SRC PARMS --# define DEST SRC+4 --# define LEN DEST+4 --#else --# define DEST PARMS --# define SRC DEST+4 --# define LEN SRC+4 --#endif -- --#define CFI_PUSH(REG) \ -- cfi_adjust_cfa_offset (4); \ -- cfi_rel_offset (REG, 0) -- --#define CFI_POP(REG) \ -- cfi_adjust_cfa_offset (-4); \ -- cfi_restore (REG) -- --#define PUSH(REG) pushl REG; CFI_PUSH (REG) --#define POP(REG) popl REG; CFI_POP (REG) -- --#ifdef SHARED --# define PARMS 8 /* Preserve EBX. */ --# define ENTRANCE PUSH (%ebx); --# define RETURN_END POP (%ebx); ret --# define RETURN RETURN_END; CFI_PUSH (%ebx) --# define JMPTBL(I, B) I - B -- --/* Load an entry in a jump table into EBX and branch to it. TABLE is a -- jump table with relative offsets. INDEX is a register contains the -- index into the jump table. SCALE is the scale of INDEX. */ --# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ -- /* We first load PC into EBX. */ \ -- call __i686.get_pc_thunk.bx; \ -- /* Get the address of the jump table. */ \ -- addl $(TABLE - .), %ebx; \ -- /* Get the entry and convert the relative offset to the \ -- absolute address. */ \ -- addl (%ebx,INDEX,SCALE), %ebx; \ -- /* We loaded the jump table. Go. */ \ -- jmp *%ebx -- --# define BRANCH_TO_JMPTBL_ENTRY_VALUE(TABLE) \ -- addl $(TABLE - .), %ebx -- --# define BRANCH_TO_JMPTBL_ENTRY_TAIL(TABLE, INDEX, SCALE) \ -- addl (%ebx,INDEX,SCALE), %ebx; \ -- /* We loaded the jump table. Go. */ \ -- jmp *%ebx -- -- .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits -- .globl __i686.get_pc_thunk.bx -- .hidden __i686.get_pc_thunk.bx -- ALIGN (4) -- .type __i686.get_pc_thunk.bx,@function --__i686.get_pc_thunk.bx: -- movl (%esp), %ebx -- ret --#else --# define PARMS 4 --# define ENTRANCE --# define RETURN_END ret --# define RETURN RETURN_END --# define JMPTBL(I, B) I -- --/* Branch to an entry in a jump table. TABLE is a jump table with -- absolute offsets. INDEX is a register contains the index into the -- jump table. SCALE is the scale of INDEX. */ --# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ -- jmp *TABLE(,INDEX,SCALE) -- --# define BRANCH_TO_JMPTBL_ENTRY_VALUE(TABLE) -- --# define BRANCH_TO_JMPTBL_ENTRY_TAIL(TABLE, INDEX, SCALE) \ -- jmp *TABLE(,INDEX,SCALE) --#endif -- -- .section .text.ssse3,"ax",@progbits --#if defined SHARED && !defined NOT_IN_libc && !defined USE_AS_BCOPY --ENTRY (MEMCPY_CHK) -- movl 12(%esp), %eax -- cmpl %eax, 16(%esp) -- jb HIDDEN_JUMPTARGET (__chk_fail) --END (MEMCPY_CHK) --#endif --ENTRY (MEMCPY) -- ENTRANCE -- movl LEN(%esp), %ecx -- movl SRC(%esp), %eax -- movl DEST(%esp), %edx -- --#ifdef USE_AS_MEMMOVE -- cmp %eax, %edx -- jb L(copy_forward) -- je L(fwd_write_0bytes) -- cmp $32, %ecx -- jge L(memmove_bwd) -- jmp L(bk_write_less32bytes_2) --L(memmove_bwd): -- add %ecx, %eax -- cmp %eax, %edx -- movl SRC(%esp), %eax -- jb L(copy_backward) -- --L(copy_forward): --#endif -- cmp $48, %ecx -- jge L(48bytesormore) -- --L(fwd_write_less32bytes): --#ifndef USE_AS_MEMMOVE -- cmp %dl, %al -- jl L(bk_write) --#endif -- add %ecx, %edx -- add %ecx, %eax -- BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) --#ifndef USE_AS_MEMMOVE --L(bk_write): -- BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4) --#endif -- -- ALIGN (4) --/* ECX > 32 and EDX is 4 byte aligned. */ --L(48bytesormore): -- movdqu (%eax), %xmm0 -- PUSH (%edi) -- movl %edx, %edi -- and $-16, %edx -- PUSH (%esi) -- add $16, %edx -- movl %edi, %esi -- sub %edx, %edi -- add %edi, %ecx -- sub %edi, %eax -- --#ifdef SHARED_CACHE_SIZE_HALF -- cmp $SHARED_CACHE_SIZE_HALF, %ecx --#else --# ifdef SHARED -- call __i686.get_pc_thunk.bx -- add $_GLOBAL_OFFSET_TABLE_, %ebx -- cmp __x86_shared_cache_size_half@GOTOFF(%ebx), %ecx --# else -- cmp __x86_shared_cache_size_half, %ecx --# endif --#endif -- -- mov %eax, %edi + mov %eax, %edi - jge L(large_page) -- and $0xf, %edi -- jz L(shl_0) -- -- BRANCH_TO_JMPTBL_ENTRY (L(shl_table), %edi, 4) -- -- ALIGN (4) --L(shl_0): -- movdqu %xmm0, (%esi) -- xor %edi, %edi -- cmp $127, %ecx -- ja L(shl_0_gobble) -- lea -32(%ecx), %ecx --L(shl_0_loop): -- movdqa (%eax, %edi), %xmm0 -- movdqa 16(%eax, %edi), %xmm1 -- sub $32, %ecx -- movdqa %xmm0, (%edx, %edi) -- movdqa %xmm1, 16(%edx, %edi) -- lea 32(%edi), %edi ++ jae L(large_page) + and $0xf, %edi + jz L(shl_0) + +@@ -201,7 +201,7 @@ L(shl_0_loop): + movdqa %xmm0, (%edx, %edi) + movdqa %xmm1, 16(%edx, %edi) + lea 32(%edi), %edi - jl L(shl_0_end) -- -- movdqa (%eax, %edi), %xmm0 -- movdqa 16(%eax, %edi), %xmm1 -- sub $32, %ecx -- movdqa %xmm0, (%edx, %edi) -- movdqa %xmm1, 16(%edx, %edi) -- lea 32(%edi), %edi ++ jb L(shl_0_end) + + movdqa (%eax, %edi), %xmm0 + movdqa 16(%eax, %edi), %xmm1 +@@ -209,7 +209,7 @@ L(shl_0_loop): + movdqa %xmm0, (%edx, %edi) + movdqa %xmm1, 16(%edx, %edi) + lea 32(%edi), %edi - jl L(shl_0_end) -- -- movdqa (%eax, %edi), %xmm0 -- movdqa 16(%eax, %edi), %xmm1 -- sub $32, %ecx -- movdqa %xmm0, (%edx, %edi) -- movdqa %xmm1, 16(%edx, %edi) -- lea 32(%edi), %edi ++ jb L(shl_0_end) + + movdqa (%eax, %edi), %xmm0 + movdqa 16(%eax, %edi), %xmm1 +@@ -217,7 +217,7 @@ L(shl_0_loop): + movdqa %xmm0, (%edx, %edi) + movdqa %xmm1, 16(%edx, %edi) + lea 32(%edi), %edi - jl L(shl_0_end) -- -- movdqa (%eax, %edi), %xmm0 -- movdqa 16(%eax, %edi), %xmm1 -- sub $32, %ecx -- movdqa %xmm0, (%edx, %edi) -- movdqa %xmm1, 16(%edx, %edi) -- lea 32(%edi), %edi --L(shl_0_end): -- lea 32(%ecx), %ecx -- add %ecx, %edi -- add %edi, %edx -- add %edi, %eax -- POP (%esi) -- POP (%edi) -- BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) -- --L(shl_0_gobble): -- --#ifdef DATA_CACHE_SIZE_HALF -- cmp $DATA_CACHE_SIZE_HALF, %ecx --#else --# ifdef SHARED -- call __i686.get_pc_thunk.bx -- add $_GLOBAL_OFFSET_TABLE_, %ebx -- mov __x86_data_cache_size_half@GOTOFF(%ebx), %edi --# else -- mov __x86_data_cache_size_half, %edi --# endif --#endif -- mov %edi, %esi -- shr $3, %esi -- sub %esi, %edi -- cmp %edi, %ecx ++ jb L(shl_0_end) + + movdqa (%eax, %edi), %xmm0 + movdqa 16(%eax, %edi), %xmm1 +@@ -251,7 +251,7 @@ L(shl_0_gobble): + shr $3, %esi + sub %esi, %edi + cmp %edi, %ecx - jge L(shl_0_gobble_mem_start) -- lea -128(%ecx), %ecx -- ALIGN (4) --L(shl_0_gobble_cache_loop): -- movdqa (%eax), %xmm0 -- movaps 0x10(%eax), %xmm1 -- movaps 0x20(%eax), %xmm2 -- movaps 0x30(%eax), %xmm3 -- movaps 0x40(%eax), %xmm4 -- movaps 0x50(%eax), %xmm5 -- movaps 0x60(%eax), %xmm6 -- movaps 0x70(%eax), %xmm7 -- lea 0x80(%eax), %eax -- sub $128, %ecx -- movdqa %xmm0, (%edx) -- movaps %xmm1, 0x10(%edx) -- movaps %xmm2, 0x20(%edx) -- movaps %xmm3, 0x30(%edx) -- movaps %xmm4, 0x40(%edx) -- movaps %xmm5, 0x50(%edx) -- movaps %xmm6, 0x60(%edx) -- movaps %xmm7, 0x70(%edx) -- lea 0x80(%edx), %edx -- ++ jae L(shl_0_gobble_mem_start) + lea -128(%ecx), %ecx + ALIGN (4) + L(shl_0_gobble_cache_loop): +@@ -275,8 +275,7 @@ L(shl_0_gobble_cache_loop): + movaps %xmm7, 0x70(%edx) + lea 0x80(%edx), %edx + - jge L(shl_0_gobble_cache_loop) -L(shl_0_gobble_cache_loop_tail): -- cmp $-0x40, %ecx -- lea 0x80(%ecx), %ecx -- jl L(shl_0_cache_less_64bytes) -- -- movdqa (%eax), %xmm0 -- sub $0x40, %ecx -- movdqa 0x10(%eax), %xmm1 -- -- movdqa %xmm0, (%edx) -- movdqa %xmm1, 0x10(%edx) -- -- movdqa 0x20(%eax), %xmm0 -- movdqa 0x30(%eax), %xmm1 -- add $0x40, %eax -- -- movdqa %xmm0, 0x20(%edx) -- movdqa %xmm1, 0x30(%edx) -- add $0x40, %edx --L(shl_0_cache_less_64bytes): -- cmp $0x20, %ecx ++ jae L(shl_0_gobble_cache_loop) + cmp $-0x40, %ecx + lea 0x80(%ecx), %ecx + jl L(shl_0_cache_less_64bytes) +@@ -297,7 +296,7 @@ L(shl_0_gobble_cache_loop_tail): + add $0x40, %edx + L(shl_0_cache_less_64bytes): + cmp $0x20, %ecx - jl L(shl_0_cache_less_32bytes) -- movdqa (%eax), %xmm0 -- sub $0x20, %ecx -- movdqa 0x10(%eax), %xmm1 -- add $0x20, %eax -- movdqa %xmm0, (%edx) -- movdqa %xmm1, 0x10(%edx) -- add $0x20, %edx --L(shl_0_cache_less_32bytes): -- cmp $0x10, %ecx ++ jb L(shl_0_cache_less_32bytes) + movdqa (%eax), %xmm0 + sub $0x20, %ecx + movdqa 0x10(%eax), %xmm1 +@@ -307,7 +306,7 @@ L(shl_0_cache_less_64bytes): + add $0x20, %edx + L(shl_0_cache_less_32bytes): + cmp $0x10, %ecx - jl L(shl_0_cache_less_16bytes) -- sub $0x10, %ecx -- movdqa (%eax), %xmm0 -- add $0x10, %eax -- movdqa %xmm0, (%edx) -- add $0x10, %edx --L(shl_0_cache_less_16bytes): -- add %ecx, %edx -- add %ecx, %eax -- POP (%esi) -- POP (%edi) -- BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) -- -- -- ALIGN (4) --L(shl_0_gobble_mem_start): -- cmp %al, %dl -- je L(copy_page_by_rep) -- lea -128(%ecx), %ecx --L(shl_0_gobble_mem_loop): -- prefetchnta 0x1c0(%eax) -- prefetchnta 0x280(%eax) -- prefetchnta 0x1c0(%edx) -- prefetchnta 0x280(%edx) -- -- movdqa (%eax), %xmm0 -- movaps 0x10(%eax), %xmm1 -- movaps 0x20(%eax), %xmm2 -- movaps 0x30(%eax), %xmm3 -- movaps 0x40(%eax), %xmm4 -- movaps 0x50(%eax), %xmm5 -- movaps 0x60(%eax), %xmm6 -- movaps 0x70(%eax), %xmm7 -- lea 0x80(%eax), %eax -- sub $0x80, %ecx -- movdqa %xmm0, (%edx) -- movaps %xmm1, 0x10(%edx) -- movaps %xmm2, 0x20(%edx) -- movaps %xmm3, 0x30(%edx) -- movaps %xmm4, 0x40(%edx) -- movaps %xmm5, 0x50(%edx) -- movaps %xmm6, 0x60(%edx) -- movaps %xmm7, 0x70(%edx) -- lea 0x80(%edx), %edx -- ++ jb L(shl_0_cache_less_16bytes) + sub $0x10, %ecx + movdqa (%eax), %xmm0 + add $0x10, %eax +@@ -352,7 +351,7 @@ L(shl_0_gobble_mem_loop): + movaps %xmm7, 0x70(%edx) + lea 0x80(%edx), %edx + - jge L(shl_0_gobble_mem_loop) -- cmp $-0x40, %ecx -- lea 0x80(%ecx), %ecx -- jl L(shl_0_mem_less_64bytes) -- -- movdqa (%eax), %xmm0 -- sub $0x40, %ecx -- movdqa 0x10(%eax), %xmm1 -- -- movdqa %xmm0, (%edx) -- movdqa %xmm1, 0x10(%edx) -- -- movdqa 0x20(%eax), %xmm0 -- movdqa 0x30(%eax), %xmm1 -- add $0x40, %eax -- -- movdqa %xmm0, 0x20(%edx) -- movdqa %xmm1, 0x30(%edx) -- add $0x40, %edx --L(shl_0_mem_less_64bytes): -- cmp $0x20, %ecx ++ jae L(shl_0_gobble_mem_loop) + cmp $-0x40, %ecx + lea 0x80(%ecx), %ecx + jl L(shl_0_mem_less_64bytes) +@@ -373,7 +372,7 @@ L(shl_0_gobble_mem_loop): + add $0x40, %edx + L(shl_0_mem_less_64bytes): + cmp $0x20, %ecx - jl L(shl_0_mem_less_32bytes) -- movdqa (%eax), %xmm0 -- sub $0x20, %ecx -- movdqa 0x10(%eax), %xmm1 -- add $0x20, %eax -- movdqa %xmm0, (%edx) -- movdqa %xmm1, 0x10(%edx) -- add $0x20, %edx --L(shl_0_mem_less_32bytes): -- cmp $0x10, %ecx ++ jb L(shl_0_mem_less_32bytes) + movdqa (%eax), %xmm0 + sub $0x20, %ecx + movdqa 0x10(%eax), %xmm1 +@@ -383,7 +382,7 @@ L(shl_0_mem_less_64bytes): + add $0x20, %edx + L(shl_0_mem_less_32bytes): + cmp $0x10, %ecx - jl L(shl_0_mem_less_16bytes) -- sub $0x10, %ecx -- movdqa (%eax), %xmm0 -- add $0x10, %eax -- movdqa %xmm0, (%edx) -- add $0x10, %edx --L(shl_0_mem_less_16bytes): -- add %ecx, %edx -- add %ecx, %eax -- POP (%esi) -- POP (%edi) -- BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) -- -- -- ALIGN (4) --L(shl_1): -- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) -- lea -1(%eax), %eax -- movaps (%eax), %xmm1 -- xor %edi, %edi -- lea -32(%ecx), %ecx -- movdqu %xmm0, (%esi) -- POP (%esi) --L(shl_1_loop): -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm4 -- palignr $1, %xmm2, %xmm3 -- palignr $1, %xmm1, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- ++ jb L(shl_0_mem_less_16bytes) + sub $0x10, %ecx + movdqa (%eax), %xmm0 + add $0x10, %eax +@@ -418,7 +417,7 @@ L(shl_1_loop): + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + - jl L(shl_1_end) -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm1 -- palignr $1, %xmm2, %xmm3 -- palignr $1, %xmm4, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- -- jae L(shl_1_loop) -- --L(shl_1_end): -- lea 32(%ecx), %ecx -- add %ecx, %edi -- add %edi, %edx -- lea 1(%edi, %eax), %eax -- POP (%edi) -- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) -- -- ALIGN (4) --L(shl_2): -- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) -- lea -2(%eax), %eax -- movaps (%eax), %xmm1 -- xor %edi, %edi -- lea -32(%ecx), %ecx -- movdqu %xmm0, (%esi) -- POP (%esi) --L(shl_2_loop): -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm4 -- palignr $2, %xmm2, %xmm3 -- palignr $2, %xmm1, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- ++ jb L(shl_1_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx +@@ -461,7 +460,7 @@ L(shl_2_loop): + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + - jl L(shl_2_end) -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm1 -- palignr $2, %xmm2, %xmm3 -- palignr $2, %xmm4, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- -- jae L(shl_2_loop) -- --L(shl_2_end): -- lea 32(%ecx), %ecx -- add %ecx, %edi -- add %edi, %edx -- lea 2(%edi, %eax), %eax -- POP (%edi) -- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) -- -- ALIGN (4) --L(shl_3): -- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) -- lea -3(%eax), %eax -- movaps (%eax), %xmm1 -- xor %edi, %edi -- lea -32(%ecx), %ecx -- movdqu %xmm0, (%esi) -- POP (%esi) --L(shl_3_loop): -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm4 -- palignr $3, %xmm2, %xmm3 -- palignr $3, %xmm1, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- ++ jb L(shl_2_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx +@@ -504,7 +503,7 @@ L(shl_3_loop): + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + - jl L(shl_3_end) -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm1 -- palignr $3, %xmm2, %xmm3 -- palignr $3, %xmm4, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- -- jae L(shl_3_loop) -- --L(shl_3_end): -- lea 32(%ecx), %ecx -- add %ecx, %edi -- add %edi, %edx -- lea 3(%edi, %eax), %eax -- POP (%edi) -- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) -- -- ALIGN (4) --L(shl_4): -- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) -- lea -4(%eax), %eax -- movaps (%eax), %xmm1 -- xor %edi, %edi -- lea -32(%ecx), %ecx -- movdqu %xmm0, (%esi) -- POP (%esi) --L(shl_4_loop): -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm4 -- palignr $4, %xmm2, %xmm3 -- palignr $4, %xmm1, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- ++ jb L(shl_3_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx +@@ -547,7 +546,7 @@ L(shl_4_loop): + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + - jl L(shl_4_end) -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm1 -- palignr $4, %xmm2, %xmm3 -- palignr $4, %xmm4, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- -- jae L(shl_4_loop) -- --L(shl_4_end): -- lea 32(%ecx), %ecx -- add %ecx, %edi -- add %edi, %edx -- lea 4(%edi, %eax), %eax -- POP (%edi) -- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) -- -- ALIGN (4) --L(shl_5): -- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) -- lea -5(%eax), %eax -- movaps (%eax), %xmm1 -- xor %edi, %edi -- lea -32(%ecx), %ecx -- movdqu %xmm0, (%esi) -- POP (%esi) --L(shl_5_loop): -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm4 -- palignr $5, %xmm2, %xmm3 -- palignr $5, %xmm1, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- ++ jb L(shl_4_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx +@@ -590,7 +589,7 @@ L(shl_5_loop): + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + - jl L(shl_5_end) -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm1 -- palignr $5, %xmm2, %xmm3 -- palignr $5, %xmm4, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- -- jae L(shl_5_loop) -- --L(shl_5_end): -- lea 32(%ecx), %ecx -- add %ecx, %edi -- add %edi, %edx -- lea 5(%edi, %eax), %eax -- POP (%edi) -- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) -- -- -- ALIGN (4) --L(shl_6): -- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) -- lea -6(%eax), %eax -- movaps (%eax), %xmm1 -- xor %edi, %edi -- lea -32(%ecx), %ecx -- movdqu %xmm0, (%esi) -- POP (%esi) --L(shl_6_loop): -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm4 -- palignr $6, %xmm2, %xmm3 -- palignr $6, %xmm1, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- ++ jb L(shl_5_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx +@@ -634,7 +633,7 @@ L(shl_6_loop): + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + - jl L(shl_6_end) -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm1 -- palignr $6, %xmm2, %xmm3 -- palignr $6, %xmm4, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- -- jae L(shl_6_loop) -- --L(shl_6_end): -- lea 32(%ecx), %ecx -- add %ecx, %edi -- add %edi, %edx -- lea 6(%edi, %eax), %eax -- POP (%edi) -- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) -- -- ALIGN (4) --L(shl_7): -- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) -- lea -7(%eax), %eax -- movaps (%eax), %xmm1 -- xor %edi, %edi -- lea -32(%ecx), %ecx -- movdqu %xmm0, (%esi) -- POP (%esi) --L(shl_7_loop): -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm4 -- palignr $7, %xmm2, %xmm3 -- palignr $7, %xmm1, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- ++ jb L(shl_6_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx +@@ -677,7 +676,7 @@ L(shl_7_loop): + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + - jl L(shl_7_end) -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm1 -- palignr $7, %xmm2, %xmm3 -- palignr $7, %xmm4, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- -- jae L(shl_7_loop) -- --L(shl_7_end): -- lea 32(%ecx), %ecx -- add %ecx, %edi -- add %edi, %edx -- lea 7(%edi, %eax), %eax -- POP (%edi) -- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) -- -- ALIGN (4) --L(shl_8): -- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) -- lea -8(%eax), %eax -- movaps (%eax), %xmm1 -- xor %edi, %edi -- lea -32(%ecx), %ecx -- movdqu %xmm0, (%esi) -- POP (%esi) --L(shl_8_loop): -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm4 -- palignr $8, %xmm2, %xmm3 -- palignr $8, %xmm1, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- ++ jb L(shl_7_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx +@@ -720,7 +719,7 @@ L(shl_8_loop): + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + - jl L(shl_8_end) -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm1 -- palignr $8, %xmm2, %xmm3 -- palignr $8, %xmm4, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- -- jae L(shl_8_loop) -- --L(shl_8_end): -- lea 32(%ecx), %ecx -- add %ecx, %edi -- add %edi, %edx -- lea 8(%edi, %eax), %eax -- POP (%edi) -- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) -- -- ALIGN (4) --L(shl_9): -- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) -- lea -9(%eax), %eax -- movaps (%eax), %xmm1 -- xor %edi, %edi -- lea -32(%ecx), %ecx -- movdqu %xmm0, (%esi) -- POP (%esi) --L(shl_9_loop): -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm4 -- palignr $9, %xmm2, %xmm3 -- palignr $9, %xmm1, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- ++ jb L(shl_8_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx +@@ -763,7 +762,7 @@ L(shl_9_loop): + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + - jl L(shl_9_end) -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm1 -- palignr $9, %xmm2, %xmm3 -- palignr $9, %xmm4, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- -- jae L(shl_9_loop) -- --L(shl_9_end): -- lea 32(%ecx), %ecx -- add %ecx, %edi -- add %edi, %edx -- lea 9(%edi, %eax), %eax -- POP (%edi) -- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) -- -- ALIGN (4) --L(shl_10): -- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) -- lea -10(%eax), %eax -- movaps (%eax), %xmm1 -- xor %edi, %edi -- lea -32(%ecx), %ecx -- movdqu %xmm0, (%esi) -- POP (%esi) --L(shl_10_loop): -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm4 -- palignr $10, %xmm2, %xmm3 -- palignr $10, %xmm1, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- ++ jb L(shl_9_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx +@@ -806,7 +805,7 @@ L(shl_10_loop): + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + - jl L(shl_10_end) -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm1 -- palignr $10, %xmm2, %xmm3 -- palignr $10, %xmm4, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- -- jae L(shl_10_loop) -- --L(shl_10_end): -- lea 32(%ecx), %ecx -- add %ecx, %edi -- add %edi, %edx -- lea 10(%edi, %eax), %eax -- POP (%edi) -- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) -- -- ALIGN (4) --L(shl_11): -- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) -- lea -11(%eax), %eax -- movaps (%eax), %xmm1 -- xor %edi, %edi -- lea -32(%ecx), %ecx -- movdqu %xmm0, (%esi) -- POP (%esi) --L(shl_11_loop): -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm4 -- palignr $11, %xmm2, %xmm3 -- palignr $11, %xmm1, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- ++ jb L(shl_10_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx +@@ -849,7 +848,7 @@ L(shl_11_loop): + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + - jl L(shl_11_end) -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm1 -- palignr $11, %xmm2, %xmm3 -- palignr $11, %xmm4, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- -- jae L(shl_11_loop) -- --L(shl_11_end): -- lea 32(%ecx), %ecx -- add %ecx, %edi -- add %edi, %edx -- lea 11(%edi, %eax), %eax -- POP (%edi) -- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) -- -- ALIGN (4) --L(shl_12): -- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) -- lea -12(%eax), %eax -- movaps (%eax), %xmm1 -- xor %edi, %edi -- lea -32(%ecx), %ecx -- movdqu %xmm0, (%esi) -- POP (%esi) --L(shl_12_loop): -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm4 -- palignr $12, %xmm2, %xmm3 -- palignr $12, %xmm1, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- ++ jb L(shl_11_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx +@@ -892,7 +891,7 @@ L(shl_12_loop): + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + - jl L(shl_12_end) -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm1 -- palignr $12, %xmm2, %xmm3 -- palignr $12, %xmm4, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- -- jae L(shl_12_loop) -- --L(shl_12_end): -- lea 32(%ecx), %ecx -- add %ecx, %edi -- add %edi, %edx -- lea 12(%edi, %eax), %eax -- POP (%edi) -- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) -- -- ALIGN (4) --L(shl_13): -- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) -- lea -13(%eax), %eax -- movaps (%eax), %xmm1 -- xor %edi, %edi -- lea -32(%ecx), %ecx -- movdqu %xmm0, (%esi) -- POP (%esi) --L(shl_13_loop): -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm4 -- palignr $13, %xmm2, %xmm3 -- palignr $13, %xmm1, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- ++ jb L(shl_12_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx +@@ -935,7 +934,7 @@ L(shl_13_loop): + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + - jl L(shl_13_end) -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm1 -- palignr $13, %xmm2, %xmm3 -- palignr $13, %xmm4, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- -- jae L(shl_13_loop) -- --L(shl_13_end): -- lea 32(%ecx), %ecx -- add %ecx, %edi -- add %edi, %edx -- lea 13(%edi, %eax), %eax -- POP (%edi) -- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) -- -- ALIGN (4) --L(shl_14): -- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) -- lea -14(%eax), %eax -- movaps (%eax), %xmm1 -- xor %edi, %edi -- lea -32(%ecx), %ecx -- movdqu %xmm0, (%esi) -- POP (%esi) --L(shl_14_loop): -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm4 -- palignr $14, %xmm2, %xmm3 -- palignr $14, %xmm1, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- ++ jb L(shl_13_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx +@@ -978,7 +977,7 @@ L(shl_14_loop): + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + - jl L(shl_14_end) -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm1 -- palignr $14, %xmm2, %xmm3 -- palignr $14, %xmm4, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- -- jae L(shl_14_loop) -- --L(shl_14_end): -- lea 32(%ecx), %ecx -- add %ecx, %edi -- add %edi, %edx -- lea 14(%edi, %eax), %eax -- POP (%edi) -- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) -- -- -- ALIGN (4) --L(shl_15): -- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) -- lea -15(%eax), %eax -- movaps (%eax), %xmm1 -- xor %edi, %edi -- lea -32(%ecx), %ecx -- movdqu %xmm0, (%esi) -- POP (%esi) --L(shl_15_loop): -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm4 -- palignr $15, %xmm2, %xmm3 -- palignr $15, %xmm1, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- ++ jb L(shl_14_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx +@@ -1022,7 +1021,7 @@ L(shl_15_loop): + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + - jl L(shl_15_end) -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm1 -- palignr $15, %xmm2, %xmm3 -- palignr $15, %xmm4, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- -- jae L(shl_15_loop) -- --L(shl_15_end): -- lea 32(%ecx), %ecx -- add %ecx, %edi -- add %edi, %edx -- lea 15(%edi, %eax), %eax -- POP (%edi) -- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) -- -- -- ALIGN (4) --L(fwd_write_44bytes): -- movl -44(%eax), %ecx -- movl %ecx, -44(%edx) --L(fwd_write_40bytes): -- movl -40(%eax), %ecx -- movl %ecx, -40(%edx) --L(fwd_write_36bytes): -- movl -36(%eax), %ecx -- movl %ecx, -36(%edx) --L(fwd_write_32bytes): -- movl -32(%eax), %ecx -- movl %ecx, -32(%edx) --L(fwd_write_28bytes): -- movl -28(%eax), %ecx -- movl %ecx, -28(%edx) --L(fwd_write_24bytes): -- movl -24(%eax), %ecx -- movl %ecx, -24(%edx) --L(fwd_write_20bytes): -- movl -20(%eax), %ecx -- movl %ecx, -20(%edx) --L(fwd_write_16bytes): -- movl -16(%eax), %ecx -- movl %ecx, -16(%edx) --L(fwd_write_12bytes): -- movl -12(%eax), %ecx -- movl %ecx, -12(%edx) --L(fwd_write_8bytes): -- movl -8(%eax), %ecx -- movl %ecx, -8(%edx) --L(fwd_write_4bytes): -- movl -4(%eax), %ecx -- movl %ecx, -4(%edx) --L(fwd_write_0bytes): --#ifndef USE_AS_BCOPY --# ifdef USE_AS_MEMPCPY -- movl %edx, %eax --# else -- movl DEST(%esp), %eax --# endif --#endif -- RETURN -- -- ALIGN (4) --L(fwd_write_5bytes): -- movl -5(%eax), %ecx -- movl -4(%eax), %eax -- movl %ecx, -5(%edx) -- movl %eax, -4(%edx) --#ifndef USE_AS_BCOPY --# ifdef USE_AS_MEMPCPY -- movl %edx, %eax --# else -- movl DEST(%esp), %eax --# endif --#endif -- RETURN -- -- ALIGN (4) --L(fwd_write_45bytes): -- movl -45(%eax), %ecx -- movl %ecx, -45(%edx) --L(fwd_write_41bytes): -- movl -41(%eax), %ecx -- movl %ecx, -41(%edx) --L(fwd_write_37bytes): -- movl -37(%eax), %ecx -- movl %ecx, -37(%edx) --L(fwd_write_33bytes): -- movl -33(%eax), %ecx -- movl %ecx, -33(%edx) --L(fwd_write_29bytes): -- movl -29(%eax), %ecx -- movl %ecx, -29(%edx) --L(fwd_write_25bytes): -- movl -25(%eax), %ecx -- movl %ecx, -25(%edx) --L(fwd_write_21bytes): -- movl -21(%eax), %ecx -- movl %ecx, -21(%edx) --L(fwd_write_17bytes): -- movl -17(%eax), %ecx -- movl %ecx, -17(%edx) --L(fwd_write_13bytes): -- movl -13(%eax), %ecx -- movl %ecx, -13(%edx) --L(fwd_write_9bytes): -- movl -9(%eax), %ecx -- movl %ecx, -9(%edx) -- movl -5(%eax), %ecx -- movl %ecx, -5(%edx) --L(fwd_write_1bytes): -- movzbl -1(%eax), %ecx -- movb %cl, -1(%edx) --#ifndef USE_AS_BCOPY --# ifdef USE_AS_MEMPCPY -- movl %edx, %eax --# else -- movl DEST(%esp), %eax --# endif --#endif -- RETURN -- -- ALIGN (4) --L(fwd_write_46bytes): -- movl -46(%eax), %ecx -- movl %ecx, -46(%edx) --L(fwd_write_42bytes): -- movl -42(%eax), %ecx -- movl %ecx, -42(%edx) --L(fwd_write_38bytes): -- movl -38(%eax), %ecx -- movl %ecx, -38(%edx) --L(fwd_write_34bytes): -- movl -34(%eax), %ecx -- movl %ecx, -34(%edx) --L(fwd_write_30bytes): -- movl -30(%eax), %ecx -- movl %ecx, -30(%edx) --L(fwd_write_26bytes): -- movl -26(%eax), %ecx -- movl %ecx, -26(%edx) --L(fwd_write_22bytes): -- movl -22(%eax), %ecx -- movl %ecx, -22(%edx) --L(fwd_write_18bytes): -- movl -18(%eax), %ecx -- movl %ecx, -18(%edx) --L(fwd_write_14bytes): -- movl -14(%eax), %ecx -- movl %ecx, -14(%edx) --L(fwd_write_10bytes): -- movl -10(%eax), %ecx -- movl %ecx, -10(%edx) --L(fwd_write_6bytes): -- movl -6(%eax), %ecx -- movl %ecx, -6(%edx) --L(fwd_write_2bytes): -- movzwl -2(%eax), %ecx -- movw %cx, -2(%edx) --#ifndef USE_AS_BCOPY --# ifdef USE_AS_MEMPCPY -- movl %edx, %eax --# else -- movl DEST(%esp), %eax --# endif --#endif -- RETURN -- -- ALIGN (4) --L(fwd_write_47bytes): -- movl -47(%eax), %ecx -- movl %ecx, -47(%edx) --L(fwd_write_43bytes): -- movl -43(%eax), %ecx -- movl %ecx, -43(%edx) --L(fwd_write_39bytes): -- movl -39(%eax), %ecx -- movl %ecx, -39(%edx) --L(fwd_write_35bytes): -- movl -35(%eax), %ecx -- movl %ecx, -35(%edx) --L(fwd_write_31bytes): -- movl -31(%eax), %ecx -- movl %ecx, -31(%edx) --L(fwd_write_27bytes): -- movl -27(%eax), %ecx -- movl %ecx, -27(%edx) --L(fwd_write_23bytes): -- movl -23(%eax), %ecx -- movl %ecx, -23(%edx) --L(fwd_write_19bytes): -- movl -19(%eax), %ecx -- movl %ecx, -19(%edx) --L(fwd_write_15bytes): -- movl -15(%eax), %ecx -- movl %ecx, -15(%edx) --L(fwd_write_11bytes): -- movl -11(%eax), %ecx -- movl %ecx, -11(%edx) --L(fwd_write_7bytes): -- movl -7(%eax), %ecx -- movl %ecx, -7(%edx) --L(fwd_write_3bytes): -- movzwl -3(%eax), %ecx -- movzbl -1(%eax), %eax -- movw %cx, -3(%edx) -- movb %al, -1(%edx) --#ifndef USE_AS_BCOPY --# ifdef USE_AS_MEMPCPY -- movl %edx, %eax --# else -- movl DEST(%esp), %eax --# endif --#endif -- RETURN -- -- ALIGN (4) --L(large_page): -- movdqu (%eax), %xmm1 -- lea 16(%eax), %eax -- movdqu %xmm0, (%esi) -- movntdq %xmm1, (%edx) -- lea 16(%edx), %edx -- cmp %al, %dl -- je L(copy_page_by_rep) --L(large_page_loop_init): -- POP (%esi) -- lea -0x90(%ecx), %ecx -- POP (%edi) --L(large_page_loop): -- prefetchnta 0x1c0(%eax) -- prefetchnta 0x280(%eax) -- movdqu (%eax), %xmm0 -- movdqu 0x10(%eax), %xmm1 -- movdqu 0x20(%eax), %xmm2 -- movdqu 0x30(%eax), %xmm3 -- movdqu 0x40(%eax), %xmm4 -- movdqu 0x50(%eax), %xmm5 -- movdqu 0x60(%eax), %xmm6 -- movdqu 0x70(%eax), %xmm7 -- lea 0x80(%eax), %eax -- lfence -- sub $0x80, %ecx -- movntdq %xmm0, (%edx) -- movntdq %xmm1, 0x10(%edx) -- movntdq %xmm2, 0x20(%edx) -- movntdq %xmm3, 0x30(%edx) -- movntdq %xmm4, 0x40(%edx) -- movntdq %xmm5, 0x50(%edx) -- movntdq %xmm6, 0x60(%edx) -- movntdq %xmm7, 0x70(%edx) -- lea 0x80(%edx), %edx -- jae L(large_page_loop) -- cmp $-0x40, %ecx -- lea 0x80(%ecx), %ecx -- jl L(large_page_less_64bytes) -- -- movdqu (%eax), %xmm0 -- movdqu 0x10(%eax), %xmm1 -- movdqu 0x20(%eax), %xmm2 -- movdqu 0x30(%eax), %xmm3 -- lea 0x40(%eax), %eax -- -- movntdq %xmm0, (%edx) -- movntdq %xmm1, 0x10(%edx) -- movntdq %xmm2, 0x20(%edx) -- movntdq %xmm3, 0x30(%edx) -- lea 0x40(%edx), %edx -- sub $0x40, %ecx --L(large_page_less_64bytes): -- cmp $32, %ecx ++ jb L(shl_15_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx +@@ -1298,7 +1297,7 @@ L(large_page_loop): + sub $0x40, %ecx + L(large_page_less_64bytes): + cmp $32, %ecx - jl L(large_page_less_32bytes) -- movdqu (%eax), %xmm0 -- movdqu 0x10(%eax), %xmm1 -- lea 0x20(%eax), %eax -- movntdq %xmm0, (%edx) -- movntdq %xmm1, 0x10(%edx) -- lea 0x20(%edx), %edx -- sub $0x20, %ecx --L(large_page_less_32bytes): -- add %ecx, %edx -- add %ecx, %eax -- sfence -- BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) -- -- ALIGN (4) --L(copy_page_by_rep): -- mov %eax, %esi -- mov %edx, %edi -- mov %ecx, %edx -- shr $2, %ecx -- and $3, %edx -- rep movsl -- jz L(copy_page_by_rep_exit) -- cmp $2, %edx -- jb L(copy_page_by_rep_left_1) -- movzwl (%esi), %eax -- movw %ax, (%edi) -- add $2, %esi -- add $2, %edi -- sub $2, %edx -- jz L(copy_page_by_rep_exit) --L(copy_page_by_rep_left_1): -- movzbl (%esi), %eax -- movb %al, (%edi) --L(copy_page_by_rep_exit): -- POP (%esi) -- POP (%edi) --#ifndef USE_AS_BCOPY -- movl DEST(%esp), %eax --# ifdef USE_AS_MEMPCPY -- movl LEN(%esp), %ecx -- add %ecx, %eax --# endif --#endif -- RETURN -- -- ALIGN (4) --L(bk_write_44bytes): -- movl 40(%eax), %ecx -- movl %ecx, 40(%edx) --L(bk_write_40bytes): -- movl 36(%eax), %ecx -- movl %ecx, 36(%edx) --L(bk_write_36bytes): -- movl 32(%eax), %ecx -- movl %ecx, 32(%edx) --L(bk_write_32bytes): -- movl 28(%eax), %ecx -- movl %ecx, 28(%edx) --L(bk_write_28bytes): -- movl 24(%eax), %ecx -- movl %ecx, 24(%edx) --L(bk_write_24bytes): -- movl 20(%eax), %ecx -- movl %ecx, 20(%edx) --L(bk_write_20bytes): -- movl 16(%eax), %ecx -- movl %ecx, 16(%edx) --L(bk_write_16bytes): -- movl 12(%eax), %ecx -- movl %ecx, 12(%edx) --L(bk_write_12bytes): -- movl 8(%eax), %ecx -- movl %ecx, 8(%edx) --L(bk_write_8bytes): -- movl 4(%eax), %ecx -- movl %ecx, 4(%edx) --L(bk_write_4bytes): -- movl (%eax), %ecx -- movl %ecx, (%edx) --L(bk_write_0bytes): --#ifndef USE_AS_BCOPY -- movl DEST(%esp), %eax --# ifdef USE_AS_MEMPCPY -- movl LEN(%esp), %ecx -- add %ecx, %eax --# endif --#endif -- RETURN -- -- ALIGN (4) --L(bk_write_45bytes): -- movl 41(%eax), %ecx -- movl %ecx, 41(%edx) --L(bk_write_41bytes): -- movl 37(%eax), %ecx -- movl %ecx, 37(%edx) --L(bk_write_37bytes): -- movl 33(%eax), %ecx -- movl %ecx, 33(%edx) --L(bk_write_33bytes): -- movl 29(%eax), %ecx -- movl %ecx, 29(%edx) --L(bk_write_29bytes): -- movl 25(%eax), %ecx -- movl %ecx, 25(%edx) --L(bk_write_25bytes): -- movl 21(%eax), %ecx -- movl %ecx, 21(%edx) --L(bk_write_21bytes): -- movl 17(%eax), %ecx -- movl %ecx, 17(%edx) --L(bk_write_17bytes): -- movl 13(%eax), %ecx -- movl %ecx, 13(%edx) --L(bk_write_13bytes): -- movl 9(%eax), %ecx -- movl %ecx, 9(%edx) --L(bk_write_9bytes): -- movl 5(%eax), %ecx -- movl %ecx, 5(%edx) --L(bk_write_5bytes): -- movl 1(%eax), %ecx -- movl %ecx, 1(%edx) --L(bk_write_1bytes): -- movzbl (%eax), %ecx -- movb %cl, (%edx) --#ifndef USE_AS_BCOPY -- movl DEST(%esp), %eax --# ifdef USE_AS_MEMPCPY -- movl LEN(%esp), %ecx -- add %ecx, %eax --# endif --#endif -- RETURN -- -- ALIGN (4) --L(bk_write_46bytes): -- movl 42(%eax), %ecx -- movl %ecx, 42(%edx) --L(bk_write_42bytes): -- movl 38(%eax), %ecx -- movl %ecx, 38(%edx) --L(bk_write_38bytes): -- movl 34(%eax), %ecx -- movl %ecx, 34(%edx) --L(bk_write_34bytes): -- movl 30(%eax), %ecx -- movl %ecx, 30(%edx) --L(bk_write_30bytes): -- movl 26(%eax), %ecx -- movl %ecx, 26(%edx) --L(bk_write_26bytes): -- movl 22(%eax), %ecx -- movl %ecx, 22(%edx) --L(bk_write_22bytes): -- movl 18(%eax), %ecx -- movl %ecx, 18(%edx) --L(bk_write_18bytes): -- movl 14(%eax), %ecx -- movl %ecx, 14(%edx) --L(bk_write_14bytes): -- movl 10(%eax), %ecx -- movl %ecx, 10(%edx) --L(bk_write_10bytes): -- movl 6(%eax), %ecx -- movl %ecx, 6(%edx) --L(bk_write_6bytes): -- movl 2(%eax), %ecx -- movl %ecx, 2(%edx) --L(bk_write_2bytes): -- movzwl (%eax), %ecx -- movw %cx, (%edx) --#ifndef USE_AS_BCOPY -- movl DEST(%esp), %eax --# ifdef USE_AS_MEMPCPY -- movl LEN(%esp), %ecx -- add %ecx, %eax --# endif --#endif -- RETURN -- -- ALIGN (4) --L(bk_write_47bytes): -- movl 43(%eax), %ecx -- movl %ecx, 43(%edx) --L(bk_write_43bytes): -- movl 39(%eax), %ecx -- movl %ecx, 39(%edx) --L(bk_write_39bytes): -- movl 35(%eax), %ecx -- movl %ecx, 35(%edx) --L(bk_write_35bytes): -- movl 31(%eax), %ecx -- movl %ecx, 31(%edx) --L(bk_write_31bytes): -- movl 27(%eax), %ecx -- movl %ecx, 27(%edx) --L(bk_write_27bytes): -- movl 23(%eax), %ecx -- movl %ecx, 23(%edx) --L(bk_write_23bytes): -- movl 19(%eax), %ecx -- movl %ecx, 19(%edx) --L(bk_write_19bytes): -- movl 15(%eax), %ecx -- movl %ecx, 15(%edx) --L(bk_write_15bytes): -- movl 11(%eax), %ecx -- movl %ecx, 11(%edx) --L(bk_write_11bytes): -- movl 7(%eax), %ecx -- movl %ecx, 7(%edx) --L(bk_write_7bytes): -- movl 3(%eax), %ecx -- movl %ecx, 3(%edx) --L(bk_write_3bytes): -- movzwl 1(%eax), %ecx -- movw %cx, 1(%edx) -- movzbl (%eax), %eax -- movb %al, (%edx) --#ifndef USE_AS_BCOPY -- movl DEST(%esp), %eax --# ifdef USE_AS_MEMPCPY -- movl LEN(%esp), %ecx -- add %ecx, %eax --# endif --#endif -- RETURN_END -- -- -- .pushsection .rodata.ssse3,"a",@progbits -- ALIGN (2) --L(table_48bytes_fwd): -- .int JMPTBL (L(fwd_write_0bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_1bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_2bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_3bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_4bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_5bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_6bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_7bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_8bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_9bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_10bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_11bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_12bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_13bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_14bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_15bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_16bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_17bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_18bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_19bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_20bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_21bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_22bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_23bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_24bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_25bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_26bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_27bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_28bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_29bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_30bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_31bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_32bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_33bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_34bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_35bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_36bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_37bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_38bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_39bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_40bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_41bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_42bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_43bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_44bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_45bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_46bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_47bytes), L(table_48bytes_fwd)) -- -- ALIGN (2) --L(shl_table): -- .int JMPTBL (L(shl_0), L(shl_table)) -- .int JMPTBL (L(shl_1), L(shl_table)) -- .int JMPTBL (L(shl_2), L(shl_table)) -- .int JMPTBL (L(shl_3), L(shl_table)) -- .int JMPTBL (L(shl_4), L(shl_table)) -- .int JMPTBL (L(shl_5), L(shl_table)) -- .int JMPTBL (L(shl_6), L(shl_table)) -- .int JMPTBL (L(shl_7), L(shl_table)) -- .int JMPTBL (L(shl_8), L(shl_table)) -- .int JMPTBL (L(shl_9), L(shl_table)) -- .int JMPTBL (L(shl_10), L(shl_table)) -- .int JMPTBL (L(shl_11), L(shl_table)) -- .int JMPTBL (L(shl_12), L(shl_table)) -- .int JMPTBL (L(shl_13), L(shl_table)) -- .int JMPTBL (L(shl_14), L(shl_table)) -- .int JMPTBL (L(shl_15), L(shl_table)) -- -- ALIGN (2) --L(table_48_bytes_bwd): -- .int JMPTBL (L(bk_write_0bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_1bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_2bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_3bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_4bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_5bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_6bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_7bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_8bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_9bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_10bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_11bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_12bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_13bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_14bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_15bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_16bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_17bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_18bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_19bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_20bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_21bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_22bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_23bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_24bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_25bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_26bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_27bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_28bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_29bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_30bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_31bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_32bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_33bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_34bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_35bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_36bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_37bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_38bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_39bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_40bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_41bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_42bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_43bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_44bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_45bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_46bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_47bytes), L(table_48_bytes_bwd)) -- -- .popsection -- --#ifdef USE_AS_MEMMOVE -- ALIGN (4) --L(copy_backward): -- PUSH (%esi) -- movl %eax, %esi -- lea (%ecx,%edx,1),%edx -- lea (%ecx,%esi,1),%esi -- testl $0x3, %edx -- jnz L(bk_align) -- --L(bk_aligned_4): -- cmp $64, %ecx ++ jb L(large_page_less_32bytes) + movdqu (%eax), %xmm0 + movdqu 0x10(%eax), %xmm1 + lea 0x20(%eax), %eax +@@ -1665,11 +1664,11 @@ L(copy_backward): + + L(bk_aligned_4): + cmp $64, %ecx - jge L(bk_write_more64bytes) -- --L(bk_write_64bytesless): -- cmp $32, %ecx ++ jae L(bk_write_more64bytes) + + L(bk_write_64bytesless): + cmp $32, %ecx - jl L(bk_write_less32bytes) -- --L(bk_write_more32bytes): -- /* Copy 32 bytes at a time. */ -- sub $32, %ecx -- movl -4(%esi), %eax -- movl %eax, -4(%edx) -- movl -8(%esi), %eax -- movl %eax, -8(%edx) -- movl -12(%esi), %eax -- movl %eax, -12(%edx) -- movl -16(%esi), %eax -- movl %eax, -16(%edx) -- movl -20(%esi), %eax -- movl %eax, -20(%edx) -- movl -24(%esi), %eax -- movl %eax, -24(%edx) -- movl -28(%esi), %eax -- movl %eax, -28(%edx) -- movl -32(%esi), %eax -- movl %eax, -32(%edx) -- sub $32, %edx -- sub $32, %esi -- --L(bk_write_less32bytes): -- movl %esi, %eax -- sub %ecx, %edx -- sub %ecx, %eax -- POP (%esi) --L(bk_write_less32bytes_2): -- BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4) -- -- ALIGN (4) --L(bk_align): -- cmp $8, %ecx ++ jb L(bk_write_less32bytes) + + L(bk_write_more32bytes): + /* Copy 32 bytes at a time. */ +@@ -1704,7 +1703,7 @@ L(bk_write_less32bytes_2): + ALIGN (4) + L(bk_align): + cmp $8, %ecx - jle L(bk_write_less32bytes) -- testl $1, %edx -- /* We get here only if (EDX & 3 ) != 0 so if (EDX & 1) ==0, -- then (EDX & 2) must be != 0. */ -- jz L(bk_got2) -- sub $1, %esi -- sub $1, %ecx -- sub $1, %edx -- movzbl (%esi), %eax -- movb %al, (%edx) -- -- testl $2, %edx -- jz L(bk_aligned_4) -- --L(bk_got2): -- sub $2, %esi -- sub $2, %ecx -- sub $2, %edx -- movzwl (%esi), %eax -- movw %ax, (%edx) -- jmp L(bk_aligned_4) -- -- ALIGN (4) --L(bk_write_more64bytes): -- /* Check alignment of last byte. */ -- testl $15, %edx -- jz L(bk_ssse3_cpy_pre) -- --/* EDX is aligned 4 bytes, but not 16 bytes. */ --L(bk_ssse3_align): -- sub $4, %esi -- sub $4, %ecx -- sub $4, %edx -- movl (%esi), %eax -- movl %eax, (%edx) -- -- testl $15, %edx -- jz L(bk_ssse3_cpy_pre) -- -- sub $4, %esi -- sub $4, %ecx -- sub $4, %edx -- movl (%esi), %eax -- movl %eax, (%edx) -- -- testl $15, %edx -- jz L(bk_ssse3_cpy_pre) -- -- sub $4, %esi -- sub $4, %ecx -- sub $4, %edx -- movl (%esi), %eax -- movl %eax, (%edx) -- --L(bk_ssse3_cpy_pre): -- cmp $64, %ecx ++ jbe L(bk_write_less32bytes) + testl $1, %edx + /* We get here only if (EDX & 3 ) != 0 so if (EDX & 1) ==0, + then (EDX & 2) must be != 0. */ +@@ -1760,7 +1759,7 @@ L(bk_ssse3_align): + + L(bk_ssse3_cpy_pre): + cmp $64, %ecx - jl L(bk_write_more32bytes) -- --L(bk_ssse3_cpy): -- sub $64, %esi -- sub $64, %ecx -- sub $64, %edx -- movdqu 0x30(%esi), %xmm3 -- movdqa %xmm3, 0x30(%edx) -- movdqu 0x20(%esi), %xmm2 -- movdqa %xmm2, 0x20(%edx) -- movdqu 0x10(%esi), %xmm1 -- movdqa %xmm1, 0x10(%edx) -- movdqu (%esi), %xmm0 -- movdqa %xmm0, (%edx) -- cmp $64, %ecx ++ jb L(bk_write_more32bytes) + + L(bk_ssse3_cpy): + sub $64, %esi +@@ -1775,7 +1774,7 @@ L(bk_ssse3_cpy): + movdqu (%esi), %xmm0 + movdqa %xmm0, (%edx) + cmp $64, %ecx - jge L(bk_ssse3_cpy) -- jmp L(bk_write_64bytesless) -- --#endif -- --END (MEMCPY) -- --#endif ---- glibc-2.11-188-g0cbcca8/sysdeps/i386/i686/multiarch/memcpy-ssse3.S -+++ glibc-2.11.90-12//dev/null -@@ -1,1737 +0,0 @@ --/* memcpy with SSSE3 -- Copyright (C) 2010 Free Software Foundation, Inc. -- Contributed by Intel Corporation. -- This file is part of the GNU C Library. -- -- The GNU C Library is free software; you can redistribute it and/or -- modify it under the terms of the GNU Lesser General Public -- License as published by the Free Software Foundation; either -- version 2.1 of the License, or (at your option) any later version. -- -- The GNU C Library is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- Lesser General Public License for more details. -- -- You should have received a copy of the GNU Lesser General Public -- License along with the GNU C Library; if not, write to the Free -- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -- 02111-1307 USA. */ -- --#include -- --#if !defined NOT_IN_libc \ -- && (defined SHARED \ -- || defined USE_AS_MEMMOVE \ -- || !defined USE_MULTIARCH) -- --#include "asm-syntax.h" -- --#ifndef MEMCPY --# define MEMCPY __memcpy_ssse3 --# define MEMCPY_CHK __memcpy_chk_ssse3 --#endif -- --#ifdef USE_AS_BCOPY --# define SRC PARMS --# define DEST SRC+4 --# define LEN DEST+4 --#else --# define DEST PARMS --# define SRC DEST+4 --# define LEN SRC+4 --#endif -- --#define CFI_PUSH(REG) \ -- cfi_adjust_cfa_offset (4); \ -- cfi_rel_offset (REG, 0) -- --#define CFI_POP(REG) \ -- cfi_adjust_cfa_offset (-4); \ -- cfi_restore (REG) -- --#define PUSH(REG) pushl REG; CFI_PUSH (REG) --#define POP(REG) popl REG; CFI_POP (REG) -- --#ifdef SHARED --# define PARMS 8 /* Preserve EBX. */ --# define ENTRANCE PUSH (%ebx); --# define RETURN_END POP (%ebx); ret --# define RETURN RETURN_END; CFI_PUSH (%ebx) --# define JMPTBL(I, B) I - B -- --/* Load an entry in a jump table into EBX and branch to it. TABLE is a -- jump table with relative offsets. INDEX is a register contains the -- index into the jump table. SCALE is the scale of INDEX. */ --# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ -- /* We first load PC into EBX. */ \ -- call __i686.get_pc_thunk.bx; \ -- /* Get the address of the jump table. */ \ -- addl $(TABLE - .), %ebx; \ -- /* Get the entry and convert the relative offset to the \ -- absolute address. */ \ -- addl (%ebx,INDEX,SCALE), %ebx; \ -- /* We loaded the jump table. Go. */ \ -- jmp *%ebx -- --# define BRANCH_TO_JMPTBL_ENTRY_VALUE(TABLE) \ -- addl $(TABLE - .), %ebx -- --# define BRANCH_TO_JMPTBL_ENTRY_TAIL(TABLE, INDEX, SCALE) \ -- addl (%ebx,INDEX,SCALE), %ebx; \ -- /* We loaded the jump table. Go. */ \ -- jmp *%ebx -- -- .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits -- .globl __i686.get_pc_thunk.bx -- .hidden __i686.get_pc_thunk.bx -- ALIGN (4) -- .type __i686.get_pc_thunk.bx,@function --__i686.get_pc_thunk.bx: -- movl (%esp), %ebx -- ret --#else --# define PARMS 4 --# define ENTRANCE --# define RETURN_END ret --# define RETURN RETURN_END --# define JMPTBL(I, B) I -- --/* Branch to an entry in a jump table. TABLE is a jump table with -- absolute offsets. INDEX is a register contains the index into the -- jump table. SCALE is the scale of INDEX. */ --# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ -- jmp *TABLE(,INDEX,SCALE) -- --# define BRANCH_TO_JMPTBL_ENTRY_VALUE(TABLE) -- --# define BRANCH_TO_JMPTBL_ENTRY_TAIL(TABLE, INDEX, SCALE) \ -- jmp *TABLE(,INDEX,SCALE) --#endif -- -- .section .text.ssse3,"ax",@progbits --#if defined SHARED && !defined NOT_IN_libc && !defined USE_AS_BCOPY --ENTRY (MEMCPY_CHK) -- movl 12(%esp), %eax -- cmpl %eax, 16(%esp) -- jb HIDDEN_JUMPTARGET (__chk_fail) --END (MEMCPY_CHK) --#endif --ENTRY (MEMCPY) -- ENTRANCE -- movl LEN(%esp), %ecx -- movl SRC(%esp), %eax -- movl DEST(%esp), %edx -- --#ifdef USE_AS_MEMMOVE -- cmp %eax, %edx -- jb L(copy_forward) -- je L(fwd_write_0bytes) -- cmp $32, %ecx ++ jae L(bk_ssse3_cpy) + jmp L(bk_write_64bytesless) + + #endif +--- glibc-2.11-215-g199428c/sysdeps/i386/i686/multiarch/memcpy-ssse3.S ++++ glibc-2.11.90-13/sysdeps/i386/i686/multiarch/memcpy-ssse3.S +@@ -128,7 +128,7 @@ ENTRY (MEMCPY) + jb L(copy_forward) + je L(fwd_write_0bytes) + cmp $32, %ecx - jge L(memmove_bwd) -- jmp L(bk_write_less32bytes_2) --L(memmove_bwd): -- add %ecx, %eax -- cmp %eax, %edx -- movl SRC(%esp), %eax -- jb L(copy_backward) -- --L(copy_forward): --#endif -- cmp $48, %ecx ++ jae L(memmove_bwd) + jmp L(bk_write_less32bytes_2) + L(memmove_bwd): + add %ecx, %eax +@@ -139,12 +139,12 @@ L(memmove_bwd): + L(copy_forward): + #endif + cmp $48, %ecx - jge L(48bytesormore) -- --L(fwd_write_less32bytes): --#ifndef USE_AS_MEMMOVE -- cmp %dl, %al ++ jae L(48bytesormore) + + L(fwd_write_less32bytes): + #ifndef USE_AS_MEMMOVE + cmp %dl, %al - jl L(bk_write) --#endif -- add %ecx, %edx -- add %ecx, %eax -- BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) --#ifndef USE_AS_MEMMOVE --L(bk_write): -- BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4) --#endif -- -- ALIGN (4) --/* ECX > 32 and EDX is 4 byte aligned. */ --L(48bytesormore): -- movdqu (%eax), %xmm0 -- PUSH (%edi) -- movl %edx, %edi -- and $-16, %edx -- PUSH (%esi) -- add $16, %edx -- movl %edi, %esi -- sub %edx, %edi -- add %edi, %ecx -- sub %edi, %eax -- --#ifdef SHARED_CACHE_SIZE_HALF -- cmp $SHARED_CACHE_SIZE_HALF, %ecx --#else --# ifdef SHARED -- call __i686.get_pc_thunk.bx -- add $_GLOBAL_OFFSET_TABLE_, %ebx -- cmp __x86_shared_cache_size_half@GOTOFF(%ebx), %ecx --# else -- cmp __x86_shared_cache_size_half, %ecx --# endif --#endif -- -- mov %eax, %edi ++ jb L(bk_write) + #endif + add %ecx, %edx + add %ecx, %eax +@@ -181,7 +181,7 @@ L(48bytesormore): + #endif + + mov %eax, %edi - jge L(large_page) -- and $0xf, %edi -- jz L(shl_0) -- -- BRANCH_TO_JMPTBL_ENTRY (L(shl_table), %edi, 4) -- -- ALIGN (4) --L(shl_0): -- movdqu %xmm0, (%esi) -- xor %edi, %edi -- POP (%esi) -- cmp $127, %ecx -- ja L(shl_0_gobble) -- lea -32(%ecx), %ecx --L(shl_0_loop): -- movdqa (%eax, %edi), %xmm0 -- movdqa 16(%eax, %edi), %xmm1 -- sub $32, %ecx -- movdqa %xmm0, (%edx, %edi) -- movdqa %xmm1, 16(%edx, %edi) -- lea 32(%edi), %edi ++ jae L(large_page) + and $0xf, %edi + jz L(shl_0) + +@@ -202,7 +202,7 @@ L(shl_0_loop): + movdqa %xmm0, (%edx, %edi) + movdqa %xmm1, 16(%edx, %edi) + lea 32(%edi), %edi - jl L(shl_0_end) -- -- movdqa (%eax, %edi), %xmm0 -- movdqa 16(%eax, %edi), %xmm1 -- sub $32, %ecx -- movdqa %xmm0, (%edx, %edi) -- movdqa %xmm1, 16(%edx, %edi) -- lea 32(%edi), %edi ++ jb L(shl_0_end) + + movdqa (%eax, %edi), %xmm0 + movdqa 16(%eax, %edi), %xmm1 +@@ -210,7 +210,7 @@ L(shl_0_loop): + movdqa %xmm0, (%edx, %edi) + movdqa %xmm1, 16(%edx, %edi) + lea 32(%edi), %edi - jl L(shl_0_end) -- -- movdqa (%eax, %edi), %xmm0 -- movdqa 16(%eax, %edi), %xmm1 -- sub $32, %ecx -- movdqa %xmm0, (%edx, %edi) -- movdqa %xmm1, 16(%edx, %edi) -- lea 32(%edi), %edi ++ jb L(shl_0_end) + + movdqa (%eax, %edi), %xmm0 + movdqa 16(%eax, %edi), %xmm1 +@@ -218,7 +218,7 @@ L(shl_0_loop): + movdqa %xmm0, (%edx, %edi) + movdqa %xmm1, 16(%edx, %edi) + lea 32(%edi), %edi - jl L(shl_0_end) -- -- movdqa (%eax, %edi), %xmm0 -- movdqa 16(%eax, %edi), %xmm1 -- sub $32, %ecx -- movdqa %xmm0, (%edx, %edi) -- movdqa %xmm1, 16(%edx, %edi) -- lea 32(%edi), %edi --L(shl_0_end): -- lea 32(%ecx), %ecx -- add %ecx, %edi -- add %edi, %edx -- add %edi, %eax -- POP (%edi) -- BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) -- --L(shl_0_gobble): -- --#ifdef DATA_CACHE_SIZE_HALF -- cmp $DATA_CACHE_SIZE_HALF, %ecx --#else --# ifdef SHARED -- call __i686.get_pc_thunk.bx -- add $_GLOBAL_OFFSET_TABLE_, %ebx -- cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx --# else -- cmp __x86_data_cache_size_half, %ecx --# endif --#endif -- -- POP (%edi) -- lea -128(%ecx), %ecx ++ jb L(shl_0_end) + + movdqa (%eax, %edi), %xmm0 + movdqa 16(%eax, %edi), %xmm1 +@@ -250,7 +250,7 @@ L(shl_0_gobble): + + POP (%edi) + lea -128(%ecx), %ecx - jge L(shl_0_gobble_mem_loop) --L(shl_0_gobble_cache_loop): -- movdqa (%eax), %xmm0 -- movdqa 0x10(%eax), %xmm1 -- movdqa 0x20(%eax), %xmm2 -- movdqa 0x30(%eax), %xmm3 -- movdqa 0x40(%eax), %xmm4 -- movdqa 0x50(%eax), %xmm5 -- movdqa 0x60(%eax), %xmm6 -- movdqa 0x70(%eax), %xmm7 -- lea 0x80(%eax), %eax -- sub $128, %ecx -- movdqa %xmm0, (%edx) -- movdqa %xmm1, 0x10(%edx) -- movdqa %xmm2, 0x20(%edx) -- movdqa %xmm3, 0x30(%edx) -- movdqa %xmm4, 0x40(%edx) -- movdqa %xmm5, 0x50(%edx) -- movdqa %xmm6, 0x60(%edx) -- movdqa %xmm7, 0x70(%edx) -- lea 0x80(%edx), %edx -- ++ jae L(shl_0_gobble_mem_loop) + L(shl_0_gobble_cache_loop): + movdqa (%eax), %xmm0 + movdqa 0x10(%eax), %xmm1 +@@ -272,8 +272,7 @@ L(shl_0_gobble_cache_loop): + movdqa %xmm7, 0x70(%edx) + lea 0x80(%edx), %edx + - jge L(shl_0_gobble_cache_loop) -L(shl_0_gobble_cache_loop_tail): -- cmp $-0x40, %ecx -- lea 0x80(%ecx), %ecx -- jl L(shl_0_cache_less_64bytes) -- -- movdqa (%eax), %xmm0 -- sub $0x40, %ecx -- movdqa 0x10(%eax), %xmm1 -- -- movdqa %xmm0, (%edx) -- movdqa %xmm1, 0x10(%edx) -- -- movdqa 0x20(%eax), %xmm0 -- movdqa 0x30(%eax), %xmm1 -- add $0x40, %eax -- -- movdqa %xmm0, 0x20(%edx) -- movdqa %xmm1, 0x30(%edx) -- add $0x40, %edx --L(shl_0_cache_less_64bytes): -- cmp $0x20, %ecx ++ jae L(shl_0_gobble_cache_loop) + cmp $-0x40, %ecx + lea 0x80(%ecx), %ecx + jl L(shl_0_cache_less_64bytes) +@@ -294,7 +293,7 @@ L(shl_0_gobble_cache_loop_tail): + add $0x40, %edx + L(shl_0_cache_less_64bytes): + cmp $0x20, %ecx - jl L(shl_0_cache_less_32bytes) -- movdqa (%eax), %xmm0 -- sub $0x20, %ecx -- movdqa 0x10(%eax), %xmm1 -- add $0x20, %eax -- movdqa %xmm0, (%edx) -- movdqa %xmm1, 0x10(%edx) -- add $0x20, %edx --L(shl_0_cache_less_32bytes): -- cmp $0x10, %ecx ++ jb L(shl_0_cache_less_32bytes) + movdqa (%eax), %xmm0 + sub $0x20, %ecx + movdqa 0x10(%eax), %xmm1 +@@ -304,7 +303,7 @@ L(shl_0_cache_less_64bytes): + add $0x20, %edx + L(shl_0_cache_less_32bytes): + cmp $0x10, %ecx - jl L(shl_0_cache_less_16bytes) -- sub $0x10, %ecx -- movdqa (%eax), %xmm0 -- add $0x10, %eax -- movdqa %xmm0, (%edx) -- add $0x10, %edx --L(shl_0_cache_less_16bytes): -- add %ecx, %edx -- add %ecx, %eax -- BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) -- -- -- ALIGN (4) --L(shl_0_gobble_mem_loop): -- prefetcht0 0x1c0(%eax) -- prefetcht0 0x280(%eax) -- prefetcht0 0x1c0(%edx) -- -- movdqa (%eax), %xmm0 -- movdqa 0x10(%eax), %xmm1 -- movdqa 0x20(%eax), %xmm2 -- movdqa 0x30(%eax), %xmm3 -- movdqa 0x40(%eax), %xmm4 -- movdqa 0x50(%eax), %xmm5 -- movdqa 0x60(%eax), %xmm6 -- movdqa 0x70(%eax), %xmm7 -- lea 0x80(%eax), %eax -- sub $0x80, %ecx -- movdqa %xmm0, (%edx) -- movdqa %xmm1, 0x10(%edx) -- movdqa %xmm2, 0x20(%edx) -- movdqa %xmm3, 0x30(%edx) -- movdqa %xmm4, 0x40(%edx) -- movdqa %xmm5, 0x50(%edx) -- movdqa %xmm6, 0x60(%edx) -- movdqa %xmm7, 0x70(%edx) -- lea 0x80(%edx), %edx -- ++ jb L(shl_0_cache_less_16bytes) + sub $0x10, %ecx + movdqa (%eax), %xmm0 + add $0x10, %eax +@@ -342,7 +341,7 @@ L(shl_0_gobble_mem_loop): + movdqa %xmm7, 0x70(%edx) + lea 0x80(%edx), %edx + - jge L(shl_0_gobble_mem_loop) -- cmp $-0x40, %ecx -- lea 0x80(%ecx), %ecx -- jl L(shl_0_mem_less_64bytes) -- -- movdqa (%eax), %xmm0 -- sub $0x40, %ecx -- movdqa 0x10(%eax), %xmm1 -- -- movdqa %xmm0, (%edx) -- movdqa %xmm1, 0x10(%edx) -- -- movdqa 0x20(%eax), %xmm0 -- movdqa 0x30(%eax), %xmm1 -- add $0x40, %eax -- -- movdqa %xmm0, 0x20(%edx) -- movdqa %xmm1, 0x30(%edx) -- add $0x40, %edx --L(shl_0_mem_less_64bytes): -- cmp $0x20, %ecx ++ jae L(shl_0_gobble_mem_loop) + cmp $-0x40, %ecx + lea 0x80(%ecx), %ecx + jl L(shl_0_mem_less_64bytes) +@@ -363,7 +362,7 @@ L(shl_0_gobble_mem_loop): + add $0x40, %edx + L(shl_0_mem_less_64bytes): + cmp $0x20, %ecx - jl L(shl_0_mem_less_32bytes) -- movdqa (%eax), %xmm0 -- sub $0x20, %ecx -- movdqa 0x10(%eax), %xmm1 -- add $0x20, %eax -- movdqa %xmm0, (%edx) -- movdqa %xmm1, 0x10(%edx) -- add $0x20, %edx --L(shl_0_mem_less_32bytes): -- cmp $0x10, %ecx ++ jb L(shl_0_mem_less_32bytes) + movdqa (%eax), %xmm0 + sub $0x20, %ecx + movdqa 0x10(%eax), %xmm1 +@@ -373,7 +372,7 @@ L(shl_0_mem_less_64bytes): + add $0x20, %edx + L(shl_0_mem_less_32bytes): + cmp $0x10, %ecx - jl L(shl_0_mem_less_16bytes) -- sub $0x10, %ecx -- movdqa (%eax), %xmm0 -- add $0x10, %eax -- movdqa %xmm0, (%edx) -- add $0x10, %edx --L(shl_0_mem_less_16bytes): -- add %ecx, %edx -- add %ecx, %eax -- BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) -- -- -- ALIGN (4) --L(shl_1): -- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) -- lea -1(%eax), %eax -- movaps (%eax), %xmm1 -- xor %edi, %edi -- lea -32(%ecx), %ecx -- movdqu %xmm0, (%esi) -- POP (%esi) --L(shl_1_loop): -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm4 -- palignr $1, %xmm2, %xmm3 -- palignr $1, %xmm1, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- ++ jb L(shl_0_mem_less_16bytes) + sub $0x10, %ecx + movdqa (%eax), %xmm0 + add $0x10, %eax +@@ -406,7 +405,7 @@ L(shl_1_loop): + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + - jl L(shl_1_end) -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm1 -- palignr $1, %xmm2, %xmm3 -- palignr $1, %xmm4, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- -- jae L(shl_1_loop) -- --L(shl_1_end): -- lea 32(%ecx), %ecx -- add %ecx, %edi -- add %edi, %edx -- lea 1(%edi, %eax), %eax -- POP (%edi) -- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) -- -- ALIGN (4) --L(shl_2): -- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) -- lea -2(%eax), %eax -- movaps (%eax), %xmm1 -- xor %edi, %edi -- lea -32(%ecx), %ecx -- movdqu %xmm0, (%esi) -- POP (%esi) --L(shl_2_loop): -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm4 -- palignr $2, %xmm2, %xmm3 -- palignr $2, %xmm1, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- ++ jb L(shl_1_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx +@@ -449,7 +448,7 @@ L(shl_2_loop): + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + - jl L(shl_2_end) -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm1 -- palignr $2, %xmm2, %xmm3 -- palignr $2, %xmm4, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- -- jae L(shl_2_loop) -- --L(shl_2_end): -- lea 32(%ecx), %ecx -- add %ecx, %edi -- add %edi, %edx -- lea 2(%edi, %eax), %eax -- POP (%edi) -- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) -- -- ALIGN (4) --L(shl_3): -- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) -- lea -3(%eax), %eax -- movaps (%eax), %xmm1 -- xor %edi, %edi -- lea -32(%ecx), %ecx -- movdqu %xmm0, (%esi) -- POP (%esi) --L(shl_3_loop): -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm4 -- palignr $3, %xmm2, %xmm3 -- palignr $3, %xmm1, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- ++ jb L(shl_2_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx +@@ -492,7 +491,7 @@ L(shl_3_loop): + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + - jl L(shl_3_end) -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm1 -- palignr $3, %xmm2, %xmm3 -- palignr $3, %xmm4, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- -- jae L(shl_3_loop) -- --L(shl_3_end): -- lea 32(%ecx), %ecx -- add %ecx, %edi -- add %edi, %edx -- lea 3(%edi, %eax), %eax -- POP (%edi) -- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) -- -- ALIGN (4) --L(shl_4): -- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) -- lea -4(%eax), %eax -- movaps (%eax), %xmm1 -- xor %edi, %edi -- lea -32(%ecx), %ecx -- movdqu %xmm0, (%esi) -- POP (%esi) --L(shl_4_loop): -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm4 -- palignr $4, %xmm2, %xmm3 -- palignr $4, %xmm1, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- ++ jb L(shl_3_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx +@@ -535,7 +534,7 @@ L(shl_4_loop): + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + - jl L(shl_4_end) -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm1 -- palignr $4, %xmm2, %xmm3 -- palignr $4, %xmm4, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- -- jae L(shl_4_loop) -- --L(shl_4_end): -- lea 32(%ecx), %ecx -- add %ecx, %edi -- add %edi, %edx -- lea 4(%edi, %eax), %eax -- POP (%edi) -- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) -- -- ALIGN (4) --L(shl_5): -- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) -- lea -5(%eax), %eax -- movaps (%eax), %xmm1 -- xor %edi, %edi -- lea -32(%ecx), %ecx -- movdqu %xmm0, (%esi) -- POP (%esi) --L(shl_5_loop): -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm4 -- palignr $5, %xmm2, %xmm3 -- palignr $5, %xmm1, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- ++ jb L(shl_4_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx +@@ -578,7 +577,7 @@ L(shl_5_loop): + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + - jl L(shl_5_end) -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm1 -- palignr $5, %xmm2, %xmm3 -- palignr $5, %xmm4, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- -- jae L(shl_5_loop) -- --L(shl_5_end): -- lea 32(%ecx), %ecx -- add %ecx, %edi -- add %edi, %edx -- lea 5(%edi, %eax), %eax -- POP (%edi) -- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) -- -- -- ALIGN (4) --L(shl_6): -- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) -- lea -6(%eax), %eax -- movaps (%eax), %xmm1 -- xor %edi, %edi -- lea -32(%ecx), %ecx -- movdqu %xmm0, (%esi) -- POP (%esi) --L(shl_6_loop): -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm4 -- palignr $6, %xmm2, %xmm3 -- palignr $6, %xmm1, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- ++ jb L(shl_5_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx +@@ -622,7 +621,7 @@ L(shl_6_loop): + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + - jl L(shl_6_end) -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm1 -- palignr $6, %xmm2, %xmm3 -- palignr $6, %xmm4, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- -- jae L(shl_6_loop) -- --L(shl_6_end): -- lea 32(%ecx), %ecx -- add %ecx, %edi -- add %edi, %edx -- lea 6(%edi, %eax), %eax -- POP (%edi) -- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) -- -- ALIGN (4) --L(shl_7): -- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) -- lea -7(%eax), %eax -- movaps (%eax), %xmm1 -- xor %edi, %edi -- lea -32(%ecx), %ecx -- movdqu %xmm0, (%esi) -- POP (%esi) --L(shl_7_loop): -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm4 -- palignr $7, %xmm2, %xmm3 -- palignr $7, %xmm1, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- ++ jb L(shl_6_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx +@@ -665,7 +664,7 @@ L(shl_7_loop): + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + - jl L(shl_7_end) -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm1 -- palignr $7, %xmm2, %xmm3 -- palignr $7, %xmm4, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- -- jae L(shl_7_loop) -- --L(shl_7_end): -- lea 32(%ecx), %ecx -- add %ecx, %edi -- add %edi, %edx -- lea 7(%edi, %eax), %eax -- POP (%edi) -- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) -- -- ALIGN (4) --L(shl_8): -- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) -- lea -8(%eax), %eax -- movaps (%eax), %xmm1 -- xor %edi, %edi -- lea -32(%ecx), %ecx -- movdqu %xmm0, (%esi) -- POP (%esi) --L(shl_8_loop): -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm4 -- palignr $8, %xmm2, %xmm3 -- palignr $8, %xmm1, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- ++ jb L(shl_7_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx +@@ -708,7 +707,7 @@ L(shl_8_loop): + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + - jl L(shl_8_end) -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm1 -- palignr $8, %xmm2, %xmm3 -- palignr $8, %xmm4, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- -- jae L(shl_8_loop) -- --L(shl_8_end): -- lea 32(%ecx), %ecx -- add %ecx, %edi -- add %edi, %edx -- lea 8(%edi, %eax), %eax -- POP (%edi) -- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) -- -- ALIGN (4) --L(shl_9): -- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) -- lea -9(%eax), %eax -- movaps (%eax), %xmm1 -- xor %edi, %edi -- lea -32(%ecx), %ecx -- movdqu %xmm0, (%esi) -- POP (%esi) --L(shl_9_loop): -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm4 -- palignr $9, %xmm2, %xmm3 -- palignr $9, %xmm1, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- ++ jb L(shl_8_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx +@@ -751,7 +750,7 @@ L(shl_9_loop): + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + - jl L(shl_9_end) -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm1 -- palignr $9, %xmm2, %xmm3 -- palignr $9, %xmm4, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- -- jae L(shl_9_loop) -- --L(shl_9_end): -- lea 32(%ecx), %ecx -- add %ecx, %edi -- add %edi, %edx -- lea 9(%edi, %eax), %eax -- POP (%edi) -- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) -- -- ALIGN (4) --L(shl_10): -- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) -- lea -10(%eax), %eax -- movaps (%eax), %xmm1 -- xor %edi, %edi -- lea -32(%ecx), %ecx -- movdqu %xmm0, (%esi) -- POP (%esi) --L(shl_10_loop): -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm4 -- palignr $10, %xmm2, %xmm3 -- palignr $10, %xmm1, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- ++ jb L(shl_9_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx +@@ -794,7 +793,7 @@ L(shl_10_loop): + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + - jl L(shl_10_end) -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm1 -- palignr $10, %xmm2, %xmm3 -- palignr $10, %xmm4, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- -- jae L(shl_10_loop) -- --L(shl_10_end): -- lea 32(%ecx), %ecx -- add %ecx, %edi -- add %edi, %edx -- lea 10(%edi, %eax), %eax -- POP (%edi) -- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) -- -- ALIGN (4) --L(shl_11): -- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) -- lea -11(%eax), %eax -- movaps (%eax), %xmm1 -- xor %edi, %edi -- lea -32(%ecx), %ecx -- movdqu %xmm0, (%esi) -- POP (%esi) --L(shl_11_loop): -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm4 -- palignr $11, %xmm2, %xmm3 -- palignr $11, %xmm1, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- ++ jb L(shl_10_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx +@@ -837,7 +836,7 @@ L(shl_11_loop): + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + - jl L(shl_11_end) -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm1 -- palignr $11, %xmm2, %xmm3 -- palignr $11, %xmm4, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- -- jae L(shl_11_loop) -- --L(shl_11_end): -- lea 32(%ecx), %ecx -- add %ecx, %edi -- add %edi, %edx -- lea 11(%edi, %eax), %eax -- POP (%edi) -- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) -- -- ALIGN (4) --L(shl_12): -- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) -- lea -12(%eax), %eax -- movaps (%eax), %xmm1 -- xor %edi, %edi -- lea -32(%ecx), %ecx -- movdqu %xmm0, (%esi) -- POP (%esi) --L(shl_12_loop): -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm4 -- palignr $12, %xmm2, %xmm3 -- palignr $12, %xmm1, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- ++ jb L(shl_11_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx +@@ -880,7 +879,7 @@ L(shl_12_loop): + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + - jl L(shl_12_end) -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm1 -- palignr $12, %xmm2, %xmm3 -- palignr $12, %xmm4, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- -- jae L(shl_12_loop) -- --L(shl_12_end): -- lea 32(%ecx), %ecx -- add %ecx, %edi -- add %edi, %edx -- lea 12(%edi, %eax), %eax -- POP (%edi) -- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) -- -- ALIGN (4) --L(shl_13): -- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) -- lea -13(%eax), %eax -- movaps (%eax), %xmm1 -- xor %edi, %edi -- lea -32(%ecx), %ecx -- movdqu %xmm0, (%esi) -- POP (%esi) --L(shl_13_loop): -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm4 -- palignr $13, %xmm2, %xmm3 -- palignr $13, %xmm1, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- ++ jb L(shl_12_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx +@@ -923,7 +922,7 @@ L(shl_13_loop): + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + - jl L(shl_13_end) -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm1 -- palignr $13, %xmm2, %xmm3 -- palignr $13, %xmm4, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- -- jae L(shl_13_loop) -- --L(shl_13_end): -- lea 32(%ecx), %ecx -- add %ecx, %edi -- add %edi, %edx -- lea 13(%edi, %eax), %eax -- POP (%edi) -- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) -- -- ALIGN (4) --L(shl_14): -- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) -- lea -14(%eax), %eax -- movaps (%eax), %xmm1 -- xor %edi, %edi -- lea -32(%ecx), %ecx -- movdqu %xmm0, (%esi) -- POP (%esi) --L(shl_14_loop): -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm4 -- palignr $14, %xmm2, %xmm3 -- palignr $14, %xmm1, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- ++ jb L(shl_13_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx +@@ -966,7 +965,7 @@ L(shl_14_loop): + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + - jl L(shl_14_end) -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm1 -- palignr $14, %xmm2, %xmm3 -- palignr $14, %xmm4, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- -- jae L(shl_14_loop) -- --L(shl_14_end): -- lea 32(%ecx), %ecx -- add %ecx, %edi -- add %edi, %edx -- lea 14(%edi, %eax), %eax -- POP (%edi) -- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) -- -- -- ALIGN (4) --L(shl_15): -- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) -- lea -15(%eax), %eax -- movaps (%eax), %xmm1 -- xor %edi, %edi -- lea -32(%ecx), %ecx -- movdqu %xmm0, (%esi) -- POP (%esi) --L(shl_15_loop): -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm4 -- palignr $15, %xmm2, %xmm3 -- palignr $15, %xmm1, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- -- jl L(shl_15_end) -- -- movdqa 16(%eax, %edi), %xmm2 -- sub $32, %ecx -- movdqa 32(%eax, %edi), %xmm3 -- movdqa %xmm3, %xmm1 -- palignr $15, %xmm2, %xmm3 -- palignr $15, %xmm4, %xmm2 -- lea 32(%edi), %edi -- movdqa %xmm2, -32(%edx, %edi) -- movdqa %xmm3, -16(%edx, %edi) -- -- jae L(shl_15_loop) -- --L(shl_15_end): -- lea 32(%ecx), %ecx -- add %ecx, %edi -- add %edi, %edx -- lea 15(%edi, %eax), %eax -- POP (%edi) -- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) -- -- -- ALIGN (4) --L(fwd_write_44bytes): -- movl -44(%eax), %ecx -- movl %ecx, -44(%edx) --L(fwd_write_40bytes): -- movl -40(%eax), %ecx -- movl %ecx, -40(%edx) --L(fwd_write_36bytes): -- movl -36(%eax), %ecx -- movl %ecx, -36(%edx) --L(fwd_write_32bytes): -- movl -32(%eax), %ecx -- movl %ecx, -32(%edx) --L(fwd_write_28bytes): -- movl -28(%eax), %ecx -- movl %ecx, -28(%edx) --L(fwd_write_24bytes): -- movl -24(%eax), %ecx -- movl %ecx, -24(%edx) --L(fwd_write_20bytes): -- movl -20(%eax), %ecx -- movl %ecx, -20(%edx) --L(fwd_write_16bytes): -- movl -16(%eax), %ecx -- movl %ecx, -16(%edx) --L(fwd_write_12bytes): -- movl -12(%eax), %ecx -- movl %ecx, -12(%edx) --L(fwd_write_8bytes): -- movl -8(%eax), %ecx -- movl %ecx, -8(%edx) --L(fwd_write_4bytes): -- movl -4(%eax), %ecx -- movl %ecx, -4(%edx) --L(fwd_write_0bytes): --#ifndef USE_AS_BCOPY --# ifdef USE_AS_MEMPCPY -- movl %edx, %eax --# else -- movl DEST(%esp), %eax --# endif --#endif -- RETURN -- -- ALIGN (4) --L(fwd_write_5bytes): -- movl -5(%eax), %ecx -- movl -4(%eax), %eax -- movl %ecx, -5(%edx) -- movl %eax, -4(%edx) --#ifndef USE_AS_BCOPY --# ifdef USE_AS_MEMPCPY -- movl %edx, %eax --# else -- movl DEST(%esp), %eax --# endif --#endif -- RETURN -- -- ALIGN (4) --L(fwd_write_45bytes): -- movl -45(%eax), %ecx -- movl %ecx, -45(%edx) --L(fwd_write_41bytes): -- movl -41(%eax), %ecx -- movl %ecx, -41(%edx) --L(fwd_write_37bytes): -- movl -37(%eax), %ecx -- movl %ecx, -37(%edx) --L(fwd_write_33bytes): -- movl -33(%eax), %ecx -- movl %ecx, -33(%edx) --L(fwd_write_29bytes): -- movl -29(%eax), %ecx -- movl %ecx, -29(%edx) --L(fwd_write_25bytes): -- movl -25(%eax), %ecx -- movl %ecx, -25(%edx) --L(fwd_write_21bytes): -- movl -21(%eax), %ecx -- movl %ecx, -21(%edx) --L(fwd_write_17bytes): -- movl -17(%eax), %ecx -- movl %ecx, -17(%edx) --L(fwd_write_13bytes): -- movl -13(%eax), %ecx -- movl %ecx, -13(%edx) --L(fwd_write_9bytes): -- movl -9(%eax), %ecx -- movl %ecx, -9(%edx) -- movl -5(%eax), %ecx -- movl %ecx, -5(%edx) --L(fwd_write_1bytes): -- movzbl -1(%eax), %ecx -- movb %cl, -1(%edx) --#ifndef USE_AS_BCOPY --# ifdef USE_AS_MEMPCPY -- movl %edx, %eax --# else -- movl DEST(%esp), %eax --# endif --#endif -- RETURN -- -- ALIGN (4) --L(fwd_write_46bytes): -- movl -46(%eax), %ecx -- movl %ecx, -46(%edx) --L(fwd_write_42bytes): -- movl -42(%eax), %ecx -- movl %ecx, -42(%edx) --L(fwd_write_38bytes): -- movl -38(%eax), %ecx -- movl %ecx, -38(%edx) --L(fwd_write_34bytes): -- movl -34(%eax), %ecx -- movl %ecx, -34(%edx) --L(fwd_write_30bytes): -- movl -30(%eax), %ecx -- movl %ecx, -30(%edx) --L(fwd_write_26bytes): -- movl -26(%eax), %ecx -- movl %ecx, -26(%edx) --L(fwd_write_22bytes): -- movl -22(%eax), %ecx -- movl %ecx, -22(%edx) --L(fwd_write_18bytes): -- movl -18(%eax), %ecx -- movl %ecx, -18(%edx) --L(fwd_write_14bytes): -- movl -14(%eax), %ecx -- movl %ecx, -14(%edx) --L(fwd_write_10bytes): -- movl -10(%eax), %ecx -- movl %ecx, -10(%edx) --L(fwd_write_6bytes): -- movl -6(%eax), %ecx -- movl %ecx, -6(%edx) --L(fwd_write_2bytes): -- movzwl -2(%eax), %ecx -- movw %cx, -2(%edx) --#ifndef USE_AS_BCOPY --# ifdef USE_AS_MEMPCPY -- movl %edx, %eax --# else -- movl DEST(%esp), %eax --# endif --#endif -- RETURN -- -- ALIGN (4) --L(fwd_write_47bytes): -- movl -47(%eax), %ecx -- movl %ecx, -47(%edx) --L(fwd_write_43bytes): -- movl -43(%eax), %ecx -- movl %ecx, -43(%edx) --L(fwd_write_39bytes): -- movl -39(%eax), %ecx -- movl %ecx, -39(%edx) --L(fwd_write_35bytes): -- movl -35(%eax), %ecx -- movl %ecx, -35(%edx) --L(fwd_write_31bytes): -- movl -31(%eax), %ecx -- movl %ecx, -31(%edx) --L(fwd_write_27bytes): -- movl -27(%eax), %ecx -- movl %ecx, -27(%edx) --L(fwd_write_23bytes): -- movl -23(%eax), %ecx -- movl %ecx, -23(%edx) --L(fwd_write_19bytes): -- movl -19(%eax), %ecx -- movl %ecx, -19(%edx) --L(fwd_write_15bytes): -- movl -15(%eax), %ecx -- movl %ecx, -15(%edx) --L(fwd_write_11bytes): -- movl -11(%eax), %ecx -- movl %ecx, -11(%edx) --L(fwd_write_7bytes): -- movl -7(%eax), %ecx -- movl %ecx, -7(%edx) --L(fwd_write_3bytes): -- movzwl -3(%eax), %ecx -- movzbl -1(%eax), %eax -- movw %cx, -3(%edx) -- movb %al, -1(%edx) --#ifndef USE_AS_BCOPY --# ifdef USE_AS_MEMPCPY -- movl %edx, %eax --# else -- movl DEST(%esp), %eax --# endif --#endif -- RETURN -- -- ALIGN (4) --L(large_page): -- movdqu (%eax), %xmm1 -- lea 16(%eax), %eax -- movdqu %xmm0, (%esi) -- movntdq %xmm1, (%edx) -- lea 16(%edx), %edx -- POP (%esi) -- lea -0x90(%ecx), %ecx -- POP (%edi) --L(large_page_loop): -- movdqu (%eax), %xmm0 -- movdqu 0x10(%eax), %xmm1 -- movdqu 0x20(%eax), %xmm2 -- movdqu 0x30(%eax), %xmm3 -- movdqu 0x40(%eax), %xmm4 -- movdqu 0x50(%eax), %xmm5 -- movdqu 0x60(%eax), %xmm6 -- movdqu 0x70(%eax), %xmm7 -- lea 0x80(%eax), %eax -- -- sub $0x80, %ecx -- movntdq %xmm0, (%edx) -- movntdq %xmm1, 0x10(%edx) -- movntdq %xmm2, 0x20(%edx) -- movntdq %xmm3, 0x30(%edx) -- movntdq %xmm4, 0x40(%edx) -- movntdq %xmm5, 0x50(%edx) -- movntdq %xmm6, 0x60(%edx) -- movntdq %xmm7, 0x70(%edx) -- lea 0x80(%edx), %edx -- jae L(large_page_loop) -- cmp $-0x40, %ecx -- lea 0x80(%ecx), %ecx -- jl L(large_page_less_64bytes) -- -- movdqu (%eax), %xmm0 -- movdqu 0x10(%eax), %xmm1 -- movdqu 0x20(%eax), %xmm2 -- movdqu 0x30(%eax), %xmm3 -- lea 0x40(%eax), %eax -- -- movntdq %xmm0, (%edx) -- movntdq %xmm1, 0x10(%edx) -- movntdq %xmm2, 0x20(%edx) -- movntdq %xmm3, 0x30(%edx) -- lea 0x40(%edx), %edx -- sub $0x40, %ecx --L(large_page_less_64bytes): -- cmp $32, %ecx -- jl L(large_page_less_32bytes) -- movdqu (%eax), %xmm0 -- movdqu 0x10(%eax), %xmm1 -- lea 0x20(%eax), %eax -- movntdq %xmm0, (%edx) -- movntdq %xmm1, 0x10(%edx) -- lea 0x20(%edx), %edx -- sub $0x20, %ecx --L(large_page_less_32bytes): -- add %ecx, %edx -- add %ecx, %eax -- sfence -- BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) -- -- -- ALIGN (4) --L(bk_write_44bytes): -- movl 40(%eax), %ecx -- movl %ecx, 40(%edx) --L(bk_write_40bytes): -- movl 36(%eax), %ecx -- movl %ecx, 36(%edx) --L(bk_write_36bytes): -- movl 32(%eax), %ecx -- movl %ecx, 32(%edx) --L(bk_write_32bytes): -- movl 28(%eax), %ecx -- movl %ecx, 28(%edx) --L(bk_write_28bytes): -- movl 24(%eax), %ecx -- movl %ecx, 24(%edx) --L(bk_write_24bytes): -- movl 20(%eax), %ecx -- movl %ecx, 20(%edx) --L(bk_write_20bytes): -- movl 16(%eax), %ecx -- movl %ecx, 16(%edx) --L(bk_write_16bytes): -- movl 12(%eax), %ecx -- movl %ecx, 12(%edx) --L(bk_write_12bytes): -- movl 8(%eax), %ecx -- movl %ecx, 8(%edx) --L(bk_write_8bytes): -- movl 4(%eax), %ecx -- movl %ecx, 4(%edx) --L(bk_write_4bytes): -- movl (%eax), %ecx -- movl %ecx, (%edx) --L(bk_write_0bytes): --#ifndef USE_AS_BCOPY -- movl DEST(%esp), %eax --# ifdef USE_AS_MEMPCPY -- movl LEN(%esp), %ecx -- add %ecx, %eax --# endif --#endif -- RETURN -- -- ALIGN (4) --L(bk_write_45bytes): -- movl 41(%eax), %ecx -- movl %ecx, 41(%edx) --L(bk_write_41bytes): -- movl 37(%eax), %ecx -- movl %ecx, 37(%edx) --L(bk_write_37bytes): -- movl 33(%eax), %ecx -- movl %ecx, 33(%edx) --L(bk_write_33bytes): -- movl 29(%eax), %ecx -- movl %ecx, 29(%edx) --L(bk_write_29bytes): -- movl 25(%eax), %ecx -- movl %ecx, 25(%edx) --L(bk_write_25bytes): -- movl 21(%eax), %ecx -- movl %ecx, 21(%edx) --L(bk_write_21bytes): -- movl 17(%eax), %ecx -- movl %ecx, 17(%edx) --L(bk_write_17bytes): -- movl 13(%eax), %ecx -- movl %ecx, 13(%edx) --L(bk_write_13bytes): -- movl 9(%eax), %ecx -- movl %ecx, 9(%edx) --L(bk_write_9bytes): -- movl 5(%eax), %ecx -- movl %ecx, 5(%edx) --L(bk_write_5bytes): -- movl 1(%eax), %ecx -- movl %ecx, 1(%edx) --L(bk_write_1bytes): -- movzbl (%eax), %ecx -- movb %cl, (%edx) --#ifndef USE_AS_BCOPY -- movl DEST(%esp), %eax --# ifdef USE_AS_MEMPCPY -- movl LEN(%esp), %ecx -- add %ecx, %eax --# endif --#endif -- RETURN -- -- ALIGN (4) --L(bk_write_46bytes): -- movl 42(%eax), %ecx -- movl %ecx, 42(%edx) --L(bk_write_42bytes): -- movl 38(%eax), %ecx -- movl %ecx, 38(%edx) --L(bk_write_38bytes): -- movl 34(%eax), %ecx -- movl %ecx, 34(%edx) --L(bk_write_34bytes): -- movl 30(%eax), %ecx -- movl %ecx, 30(%edx) --L(bk_write_30bytes): -- movl 26(%eax), %ecx -- movl %ecx, 26(%edx) --L(bk_write_26bytes): -- movl 22(%eax), %ecx -- movl %ecx, 22(%edx) --L(bk_write_22bytes): -- movl 18(%eax), %ecx -- movl %ecx, 18(%edx) --L(bk_write_18bytes): -- movl 14(%eax), %ecx -- movl %ecx, 14(%edx) --L(bk_write_14bytes): -- movl 10(%eax), %ecx -- movl %ecx, 10(%edx) --L(bk_write_10bytes): -- movl 6(%eax), %ecx -- movl %ecx, 6(%edx) --L(bk_write_6bytes): -- movl 2(%eax), %ecx -- movl %ecx, 2(%edx) --L(bk_write_2bytes): -- movzwl (%eax), %ecx -- movw %cx, (%edx) --#ifndef USE_AS_BCOPY -- movl DEST(%esp), %eax --# ifdef USE_AS_MEMPCPY -- movl LEN(%esp), %ecx -- add %ecx, %eax --# endif --#endif -- RETURN -- -- ALIGN (4) --L(bk_write_47bytes): -- movl 43(%eax), %ecx -- movl %ecx, 43(%edx) --L(bk_write_43bytes): -- movl 39(%eax), %ecx -- movl %ecx, 39(%edx) --L(bk_write_39bytes): -- movl 35(%eax), %ecx -- movl %ecx, 35(%edx) --L(bk_write_35bytes): -- movl 31(%eax), %ecx -- movl %ecx, 31(%edx) --L(bk_write_31bytes): -- movl 27(%eax), %ecx -- movl %ecx, 27(%edx) --L(bk_write_27bytes): -- movl 23(%eax), %ecx -- movl %ecx, 23(%edx) --L(bk_write_23bytes): -- movl 19(%eax), %ecx -- movl %ecx, 19(%edx) --L(bk_write_19bytes): -- movl 15(%eax), %ecx -- movl %ecx, 15(%edx) --L(bk_write_15bytes): -- movl 11(%eax), %ecx -- movl %ecx, 11(%edx) --L(bk_write_11bytes): -- movl 7(%eax), %ecx -- movl %ecx, 7(%edx) --L(bk_write_7bytes): -- movl 3(%eax), %ecx -- movl %ecx, 3(%edx) --L(bk_write_3bytes): -- movzwl 1(%eax), %ecx -- movw %cx, 1(%edx) -- movzbl (%eax), %eax -- movb %al, (%edx) --#ifndef USE_AS_BCOPY -- movl DEST(%esp), %eax --# ifdef USE_AS_MEMPCPY -- movl LEN(%esp), %ecx -- add %ecx, %eax --# endif --#endif -- RETURN_END -- -- -- .pushsection .rodata.ssse3,"a",@progbits -- ALIGN (2) --L(table_48bytes_fwd): -- .int JMPTBL (L(fwd_write_0bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_1bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_2bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_3bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_4bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_5bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_6bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_7bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_8bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_9bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_10bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_11bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_12bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_13bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_14bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_15bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_16bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_17bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_18bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_19bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_20bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_21bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_22bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_23bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_24bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_25bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_26bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_27bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_28bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_29bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_30bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_31bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_32bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_33bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_34bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_35bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_36bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_37bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_38bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_39bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_40bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_41bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_42bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_43bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_44bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_45bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_46bytes), L(table_48bytes_fwd)) -- .int JMPTBL (L(fwd_write_47bytes), L(table_48bytes_fwd)) -- -- ALIGN (2) --L(shl_table): -- .int JMPTBL (L(shl_0), L(shl_table)) -- .int JMPTBL (L(shl_1), L(shl_table)) -- .int JMPTBL (L(shl_2), L(shl_table)) -- .int JMPTBL (L(shl_3), L(shl_table)) -- .int JMPTBL (L(shl_4), L(shl_table)) -- .int JMPTBL (L(shl_5), L(shl_table)) -- .int JMPTBL (L(shl_6), L(shl_table)) -- .int JMPTBL (L(shl_7), L(shl_table)) -- .int JMPTBL (L(shl_8), L(shl_table)) -- .int JMPTBL (L(shl_9), L(shl_table)) -- .int JMPTBL (L(shl_10), L(shl_table)) -- .int JMPTBL (L(shl_11), L(shl_table)) -- .int JMPTBL (L(shl_12), L(shl_table)) -- .int JMPTBL (L(shl_13), L(shl_table)) -- .int JMPTBL (L(shl_14), L(shl_table)) -- .int JMPTBL (L(shl_15), L(shl_table)) -- -- ALIGN (2) --L(table_48_bytes_bwd): -- .int JMPTBL (L(bk_write_0bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_1bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_2bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_3bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_4bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_5bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_6bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_7bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_8bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_9bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_10bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_11bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_12bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_13bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_14bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_15bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_16bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_17bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_18bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_19bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_20bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_21bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_22bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_23bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_24bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_25bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_26bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_27bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_28bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_29bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_30bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_31bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_32bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_33bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_34bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_35bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_36bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_37bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_38bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_39bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_40bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_41bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_42bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_43bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_44bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_45bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_46bytes), L(table_48_bytes_bwd)) -- .int JMPTBL (L(bk_write_47bytes), L(table_48_bytes_bwd)) -- -- .popsection -- --#ifdef USE_AS_MEMMOVE -- ALIGN (4) --L(copy_backward): -- PUSH (%esi) -- movl %eax, %esi -- lea (%ecx,%edx,1),%edx -- lea (%ecx,%esi,1),%esi -- testl $0x3, %edx -- jnz L(bk_align) -- --L(bk_aligned_4): -- cmp $64, %ecx -- jge L(bk_write_more64bytes) -- --L(bk_write_64bytesless): -- cmp $32, %ecx -- jl L(bk_write_less32bytes) -- --L(bk_write_more32bytes): -- /* Copy 32 bytes at a time. */ -- sub $32, %ecx -- movl -4(%esi), %eax -- movl %eax, -4(%edx) -- movl -8(%esi), %eax -- movl %eax, -8(%edx) -- movl -12(%esi), %eax -- movl %eax, -12(%edx) -- movl -16(%esi), %eax -- movl %eax, -16(%edx) -- movl -20(%esi), %eax -- movl %eax, -20(%edx) -- movl -24(%esi), %eax -- movl %eax, -24(%edx) -- movl -28(%esi), %eax -- movl %eax, -28(%edx) -- movl -32(%esi), %eax -- movl %eax, -32(%edx) -- sub $32, %edx -- sub $32, %esi -- --L(bk_write_less32bytes): -- movl %esi, %eax -- sub %ecx, %edx -- sub %ecx, %eax -- POP (%esi) --L(bk_write_less32bytes_2): -- BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4) -- -- ALIGN (4) --L(bk_align): -- cmp $8, %ecx -- jle L(bk_write_less32bytes) -- testl $1, %edx -- /* We get here only if (EDX & 3 ) != 0 so if (EDX & 1) ==0, -- then (EDX & 2) must be != 0. */ -- jz L(bk_got2) -- sub $1, %esi -- sub $1, %ecx -- sub $1, %edx -- movzbl (%esi), %eax -- movb %al, (%edx) -- -- testl $2, %edx -- jz L(bk_aligned_4) -- --L(bk_got2): -- sub $2, %esi -- sub $2, %ecx -- sub $2, %edx -- movzwl (%esi), %eax -- movw %ax, (%edx) -- jmp L(bk_aligned_4) -- -- ALIGN (4) --L(bk_write_more64bytes): -- /* Check alignment of last byte. */ -- testl $15, %edx -- jz L(bk_ssse3_cpy_pre) -- --/* EDX is aligned 4 bytes, but not 16 bytes. */ --L(bk_ssse3_align): -- sub $4, %esi -- sub $4, %ecx -- sub $4, %edx -- movl (%esi), %eax -- movl %eax, (%edx) -- -- testl $15, %edx -- jz L(bk_ssse3_cpy_pre) -- -- sub $4, %esi -- sub $4, %ecx -- sub $4, %edx -- movl (%esi), %eax -- movl %eax, (%edx) -- -- testl $15, %edx -- jz L(bk_ssse3_cpy_pre) -- -- sub $4, %esi -- sub $4, %ecx -- sub $4, %edx -- movl (%esi), %eax -- movl %eax, (%edx) -- --L(bk_ssse3_cpy_pre): -- cmp $64, %ecx ++ jb L(shl_14_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx +@@ -1010,7 +1009,7 @@ L(shl_15_loop): + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + +- jl L(shl_15_end) ++ jb L(shl_15_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx +@@ -1281,7 +1280,7 @@ L(large_page_loop): + sub $0x40, %ecx + L(large_page_less_64bytes): + cmp $32, %ecx +- jl L(large_page_less_32bytes) ++ jb L(large_page_less_32bytes) + movdqu (%eax), %xmm0 + movdqu 0x10(%eax), %xmm1 + lea 0x20(%eax), %eax +@@ -1617,11 +1616,11 @@ L(copy_backward): + + L(bk_aligned_4): + cmp $64, %ecx +- jge L(bk_write_more64bytes) ++ jae L(bk_write_more64bytes) + + L(bk_write_64bytesless): + cmp $32, %ecx +- jl L(bk_write_less32bytes) ++ jb L(bk_write_less32bytes) + + L(bk_write_more32bytes): + /* Copy 32 bytes at a time. */ +@@ -1656,7 +1655,7 @@ L(bk_write_less32bytes_2): + ALIGN (4) + L(bk_align): + cmp $8, %ecx +- jle L(bk_write_less32bytes) ++ jbe L(bk_write_less32bytes) + testl $1, %edx + /* We get here only if (EDX & 3 ) != 0 so if (EDX & 1) ==0, + then (EDX & 2) must be != 0. */ +@@ -1712,7 +1711,7 @@ L(bk_ssse3_align): + + L(bk_ssse3_cpy_pre): + cmp $64, %ecx - jl L(bk_write_more32bytes) -- --L(bk_ssse3_cpy): -- sub $64, %esi -- sub $64, %ecx -- sub $64, %edx -- movdqu 0x30(%esi), %xmm3 -- movdqa %xmm3, 0x30(%edx) -- movdqu 0x20(%esi), %xmm2 -- movdqa %xmm2, 0x20(%edx) -- movdqu 0x10(%esi), %xmm1 -- movdqa %xmm1, 0x10(%edx) -- movdqu (%esi), %xmm0 -- movdqa %xmm0, (%edx) -- cmp $64, %ecx ++ jb L(bk_write_more32bytes) + + L(bk_ssse3_cpy): + sub $64, %esi +@@ -1727,7 +1726,7 @@ L(bk_ssse3_cpy): + movdqu (%esi), %xmm0 + movdqa %xmm0, (%edx) + cmp $64, %ecx - jge L(bk_ssse3_cpy) -- jmp L(bk_write_64bytesless) -- --#endif -- --END (MEMCPY) -- --#endif ---- glibc-2.11-188-g0cbcca8/sysdeps/i386/i686/multiarch/memcpy.S -+++ glibc-2.11.90-12//dev/null -@@ -1,90 +0,0 @@ --/* Multiple versions of memcpy -- Copyright (C) 2010 Free Software Foundation, Inc. -- Contributed by Intel Corporation. -- This file is part of the GNU C Library. -- -- The GNU C Library is free software; you can redistribute it and/or -- modify it under the terms of the GNU Lesser General Public -- License as published by the Free Software Foundation; either -- version 2.1 of the License, or (at your option) any later version. -- -- The GNU C Library is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- Lesser General Public License for more details. -- -- You should have received a copy of the GNU Lesser General Public -- License along with the GNU C Library; if not, write to the Free -- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -- 02111-1307 USA. */ -- --#include --#include -- --/* Define multiple versions only for the definition in lib and for -- DSO. In static binaries we need memcpy before the initialization -- happened. */ --#if defined SHARED && !defined NOT_IN_libc -- .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits -- .globl __i686.get_pc_thunk.bx -- .hidden __i686.get_pc_thunk.bx -- .p2align 4 -- .type __i686.get_pc_thunk.bx,@function --__i686.get_pc_thunk.bx: -- movl (%esp), %ebx -- ret -- -- .text --ENTRY(memcpy) -- .type memcpy, @gnu_indirect_function -- pushl %ebx -- cfi_adjust_cfa_offset (4) -- cfi_rel_offset (ebx, 0) -- call __i686.get_pc_thunk.bx -- addl $_GLOBAL_OFFSET_TABLE_, %ebx -- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx) -- jne 1f -- call __init_cpu_features --1: leal __memcpy_ia32@GOTOFF(%ebx), %eax -- testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx) -- jz 2f -- leal __memcpy_ssse3@GOTOFF(%ebx), %eax -- testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx) -- jz 2f -- leal __memcpy_ssse3_rep@GOTOFF(%ebx), %eax --2: popl %ebx -- cfi_adjust_cfa_offset (-4) -- cfi_restore (ebx) -- ret --END(memcpy) -- --# undef ENTRY --# define ENTRY(name) \ -- .type __memcpy_ia32, @function; \ -- .p2align 4; \ -- __memcpy_ia32: cfi_startproc; \ -- CALL_MCOUNT --# undef END --# define END(name) \ -- cfi_endproc; .size __memcpy_ia32, .-__memcpy_ia32 -- --# undef ENTRY_CHK --# define ENTRY_CHK(name) \ -- .type __memcpy_chk_ia32, @function; \ -- .globl __memcpy_chk_ia32; \ -- .p2align 4; \ -- __memcpy_chk_ia32: cfi_startproc; \ -- CALL_MCOUNT --# undef END_CHK --# define END_CHK(name) \ -- cfi_endproc; .size __memcpy_chk_ia32, .-__memcpy_chk_ia32 -- --# undef libc_hidden_builtin_def --/* IFUNC doesn't work with the hidden functions in shared library since -- they will be called without setting up EBX needed for PLT which is -- used by IFUNC. */ --# define libc_hidden_builtin_def(name) \ -- .globl __GI_memcpy; __GI_memcpy = __memcpy_ia32 --#endif -- --#include "../memcpy.S" ---- glibc-2.11-188-g0cbcca8/sysdeps/i386/i686/multiarch/memcpy_chk.S -+++ glibc-2.11.90-12//dev/null -@@ -1,64 +0,0 @@ --/* Multiple versions of __memcpy_chk -- Copyright (C) 2010 Free Software Foundation, Inc. -- Contributed by Intel Corporation. -- This file is part of the GNU C Library. -- -- The GNU C Library is free software; you can redistribute it and/or -- modify it under the terms of the GNU Lesser General Public -- License as published by the Free Software Foundation; either -- version 2.1 of the License, or (at your option) any later version. -- -- The GNU C Library is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- Lesser General Public License for more details. -- -- You should have received a copy of the GNU Lesser General Public -- License along with the GNU C Library; if not, write to the Free -- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -- 02111-1307 USA. */ -- --#include --#include -- --/* Define multiple versions only for the definition in lib and for -- DSO. There are no multiarch memcpy functions for static binaries. -- */ --#ifndef NOT_IN_libc --# ifdef SHARED -- .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits -- .globl __i686.get_pc_thunk.bx -- .hidden __i686.get_pc_thunk.bx -- .p2align 4 -- .type __i686.get_pc_thunk.bx,@function --__i686.get_pc_thunk.bx: -- movl (%esp), %ebx -- ret -- -- .text --ENTRY(__memcpy_chk) -- .type __memcpy_chk, @gnu_indirect_function -- pushl %ebx -- cfi_adjust_cfa_offset (4) -- cfi_rel_offset (ebx, 0) -- call __i686.get_pc_thunk.bx -- addl $_GLOBAL_OFFSET_TABLE_, %ebx -- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx) -- jne 1f -- call __init_cpu_features --1: leal __memcpy_chk_ia32@GOTOFF(%ebx), %eax -- testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx) -- jz 2f -- leal __memcpy_chk_ssse3@GOTOFF(%ebx), %eax -- testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx) -- jz 2f -- leal __memcpy_chk_ssse3_rep@GOTOFF(%ebx), %eax --2: popl %ebx -- cfi_adjust_cfa_offset (-4) -- cfi_restore (ebx) -- ret --END(__memcpy_chk) --# else --# include "../memcpy_chk.S" --# endif --#endif ---- glibc-2.11-188-g0cbcca8/sysdeps/i386/i686/multiarch/memmove-ssse3-rep.S -+++ glibc-2.11.90-12//dev/null -@@ -1,4 +0,0 @@ --#define USE_AS_MEMMOVE --#define MEMCPY __memmove_ssse3_rep --#define MEMCPY_CHK __memmove_chk_ssse3_rep --#include "memcpy-ssse3-rep.S" ---- glibc-2.11-188-g0cbcca8/sysdeps/i386/i686/multiarch/memmove-ssse3.S -+++ glibc-2.11.90-12//dev/null -@@ -1,4 +0,0 @@ --#define USE_AS_MEMMOVE --#define MEMCPY __memmove_ssse3 --#define MEMCPY_CHK __memmove_chk_ssse3 --#include "memcpy-ssse3.S" ---- glibc-2.11-188-g0cbcca8/sysdeps/i386/i686/multiarch/memmove.S -+++ glibc-2.11.90-12//dev/null -@@ -1,117 +0,0 @@ --/* Multiple versions of memmove -- Copyright (C) 2010 Free Software Foundation, Inc. -- Contributed by Intel Corporation. -- This file is part of the GNU C Library. -- -- The GNU C Library is free software; you can redistribute it and/or -- modify it under the terms of the GNU Lesser General Public -- License as published by the Free Software Foundation; either -- version 2.1 of the License, or (at your option) any later version. -- -- The GNU C Library is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- Lesser General Public License for more details. -- -- You should have received a copy of the GNU Lesser General Public -- License along with the GNU C Library; if not, write to the Free -- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -- 02111-1307 USA. */ -- --#include --#include -- --/* Define multiple versions only for the definition in lib. */ --#ifndef NOT_IN_libc --# ifdef SHARED -- .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits -- .globl __i686.get_pc_thunk.bx -- .hidden __i686.get_pc_thunk.bx -- .p2align 4 -- .type __i686.get_pc_thunk.bx,@function --__i686.get_pc_thunk.bx: -- movl (%esp), %ebx -- ret -- -- .text --ENTRY(memmove) -- .type memmove, @gnu_indirect_function -- pushl %ebx -- cfi_adjust_cfa_offset (4) -- cfi_rel_offset (ebx, 0) -- call __i686.get_pc_thunk.bx -- addl $_GLOBAL_OFFSET_TABLE_, %ebx -- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx) -- jne 1f -- call __init_cpu_features --1: leal __memmove_ia32@GOTOFF(%ebx), %eax -- testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx) -- jz 2f -- leal __memmove_ssse3@GOTOFF(%ebx), %eax -- testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx) -- jz 2f -- leal __memmove_ssse3_rep@GOTOFF(%ebx), %eax --2: popl %ebx -- cfi_adjust_cfa_offset (-4) -- cfi_restore (ebx) -- ret --END(memmove) -- --# undef ENTRY --# define ENTRY(name) \ -- .type __memmove_ia32, @function; \ -- .p2align 4; \ -- __memmove_ia32: cfi_startproc; \ -- CALL_MCOUNT --# else -- .text --ENTRY(memmove) -- .type memmove, @gnu_indirect_function -- cmpl $0, KIND_OFFSET+__cpu_features -- jne 1f -- call __init_cpu_features --1: leal __memmove_ia32, %eax -- testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features -- jz 2f -- leal __memmove_ssse3, %eax -- testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features -- jz 2f -- leal __memmove_ssse3_rep, %eax --2: ret --END(memmove) -- --# undef ENTRY --# define ENTRY(name) \ -- .type __memmove_ia32, @function; \ -- .globl __memmove_ia32; \ -- .p2align 4; \ -- __memmove_ia32: cfi_startproc; \ -- CALL_MCOUNT --# endif -- --# undef END --# define END(name) \ -- cfi_endproc; .size __memmove_ia32, .-__memmove_ia32 -- --# undef ENTRY_CHK --# define ENTRY_CHK(name) \ -- .type __memmove_chk_ia32, @function; \ -- .globl __memmove_chk_ia32; \ -- .p2align 4; \ -- __memmove_chk_ia32: cfi_startproc; \ -- CALL_MCOUNT --# undef END_CHK --# define END_CHK(name) \ -- cfi_endproc; .size __memmove_chk_ia32, .-__memmove_chk_ia32 -- --# ifdef SHARED --# undef libc_hidden_builtin_def --/* IFUNC doesn't work with the hidden functions in shared library since -- they will be called without setting up EBX needed for PLT which is -- used by IFUNC. */ --# define libc_hidden_builtin_def(name) \ -- .globl __GI_memmove; __GI_memmove = __memmove_ia32 --# endif --#endif -- --#include "../memmove.S" ---- glibc-2.11-188-g0cbcca8/sysdeps/i386/i686/multiarch/memmove_chk.S -+++ glibc-2.11.90-12//dev/null -@@ -1,112 +0,0 @@ --/* Multiple versions of __memmove_chk -- Copyright (C) 2010 Free Software Foundation, Inc. -- Contributed by Intel Corporation. -- This file is part of the GNU C Library. -- -- The GNU C Library is free software; you can redistribute it and/or -- modify it under the terms of the GNU Lesser General Public -- License as published by the Free Software Foundation; either -- version 2.1 of the License, or (at your option) any later version. -- -- The GNU C Library is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- Lesser General Public License for more details. -- -- You should have received a copy of the GNU Lesser General Public -- License along with the GNU C Library; if not, write to the Free -- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -- 02111-1307 USA. */ -- --#include --#include -- --/* Define multiple versions only for the definition in lib. */ --#ifndef NOT_IN_libc --# ifdef SHARED -- .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits -- .globl __i686.get_pc_thunk.bx -- .hidden __i686.get_pc_thunk.bx -- .p2align 4 -- .type __i686.get_pc_thunk.bx,@function --__i686.get_pc_thunk.bx: -- movl (%esp), %ebx -- ret -- -- .text --ENTRY(__memmove_chk) -- .type __memmove_chk, @gnu_indirect_function -- pushl %ebx -- cfi_adjust_cfa_offset (4) -- cfi_rel_offset (ebx, 0) -- call __i686.get_pc_thunk.bx -- addl $_GLOBAL_OFFSET_TABLE_, %ebx -- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx) -- jne 1f -- call __init_cpu_features --1: leal __memmove_chk_ia32@GOTOFF(%ebx), %eax -- testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx) -- jz 2f -- leal __memmove_chk_ssse3@GOTOFF(%ebx), %eax -- testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx) -- jz 2f -- leal __memmove_chk_ssse3_rep@GOTOFF(%ebx), %eax --2: popl %ebx -- cfi_adjust_cfa_offset (-4) -- cfi_restore (ebx) -- ret --END(__memmove_chk) --# else -- .text --ENTRY(__memmove_chk) -- .type __memmove_chk, @gnu_indirect_function -- cmpl $0, KIND_OFFSET+__cpu_features -- jne 1f -- call __init_cpu_features --1: leal __memmove_chk_ia32, %eax -- testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features -- jz 2f -- leal __memmove_chk_ssse3, %eax -- testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features -- jz 2f -- leal __memmove_chk_ssse3_rep, %eax --2: ret --END(__memmove_chk) -- -- .type __memmove_chk_ssse3, @function -- .p2align 4; --__memmove_chk_ssse3: -- cfi_startproc -- CALL_MCOUNT -- movl 12(%esp), %eax -- cmpl %eax, 16(%esp) -- jb __chk_fail -- jmp __memmove_ssse3 -- cfi_endproc -- .size __memmove_chk_ssse3, .-__memmove_chk_ssse3 -- -- .type __memmove_chk_ssse3_rep, @function -- .p2align 4; --__memmove_chk_ssse3_rep: -- cfi_startproc -- CALL_MCOUNT -- movl 12(%esp), %eax -- cmpl %eax, 16(%esp) -- jb __chk_fail -- jmp __memmove_ssse3_rep -- cfi_endproc -- .size __memmove_chk_ssse3_rep, .-__memmove_chk_ssse3_rep -- -- .type __memmove_chk_ia32, @function -- .p2align 4; --__memmove_chk_ia32: -- cfi_startproc -- CALL_MCOUNT -- movl 12(%esp), %eax -- cmpl %eax, 16(%esp) -- jb __chk_fail -- jmp __memmove_ia32 -- cfi_endproc -- .size __memmove_chk_ia32, .-__memmove_chk_ia32 --# endif --#endif ---- glibc-2.11-188-g0cbcca8/sysdeps/i386/i686/multiarch/mempcpy-ssse3-rep.S -+++ glibc-2.11.90-12//dev/null -@@ -1,4 +0,0 @@ --#define USE_AS_MEMPCPY --#define MEMCPY __mempcpy_ssse3_rep --#define MEMCPY_CHK __mempcpy_chk_ssse3_rep --#include "memcpy-ssse3-rep.S" ---- glibc-2.11-188-g0cbcca8/sysdeps/i386/i686/multiarch/mempcpy-ssse3.S -+++ glibc-2.11.90-12//dev/null -@@ -1,4 +0,0 @@ --#define USE_AS_MEMPCPY --#define MEMCPY __mempcpy_ssse3 --#define MEMCPY_CHK __mempcpy_chk_ssse3 --#include "memcpy-ssse3.S" ---- glibc-2.11-188-g0cbcca8/sysdeps/i386/i686/multiarch/mempcpy.S -+++ glibc-2.11.90-12//dev/null -@@ -1,93 +0,0 @@ --/* Multiple versions of mempcpy -- Copyright (C) 2010 Free Software Foundation, Inc. -- Contributed by Intel Corporation. -- This file is part of the GNU C Library. -- -- The GNU C Library is free software; you can redistribute it and/or -- modify it under the terms of the GNU Lesser General Public -- License as published by the Free Software Foundation; either -- version 2.1 of the License, or (at your option) any later version. -- -- The GNU C Library is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- Lesser General Public License for more details. -- -- You should have received a copy of the GNU Lesser General Public -- License along with the GNU C Library; if not, write to the Free -- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -- 02111-1307 USA. */ -- --#include --#include -- --/* Define multiple versions only for the definition in lib and for -- DSO. In static binaries we need mempcpy before the initialization -- happened. */ --#if defined SHARED && !defined NOT_IN_libc -- .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits -- .globl __i686.get_pc_thunk.bx -- .hidden __i686.get_pc_thunk.bx -- .p2align 4 -- .type __i686.get_pc_thunk.bx,@function --__i686.get_pc_thunk.bx: -- movl (%esp), %ebx -- ret -- -- .text --ENTRY(__mempcpy) -- .type __mempcpy, @gnu_indirect_function -- pushl %ebx -- cfi_adjust_cfa_offset (4) -- cfi_rel_offset (ebx, 0) -- call __i686.get_pc_thunk.bx -- addl $_GLOBAL_OFFSET_TABLE_, %ebx -- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx) -- jne 1f -- call __init_cpu_features --1: leal __mempcpy_ia32@GOTOFF(%ebx), %eax -- testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx) -- jz 2f -- leal __mempcpy_ssse3@GOTOFF(%ebx), %eax -- testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx) -- jz 2f -- leal __mempcpy_ssse3_rep@GOTOFF(%ebx), %eax --2: popl %ebx -- cfi_adjust_cfa_offset (-4) -- cfi_restore (ebx) -- ret --END(__mempcpy) -- --# undef ENTRY --# define ENTRY(name) \ -- .type __mempcpy_ia32, @function; \ -- .p2align 4; \ -- __mempcpy_ia32: cfi_startproc; \ -- CALL_MCOUNT --# undef END --# define END(name) \ -- cfi_endproc; .size __mempcpy_ia32, .-__mempcpy_ia32 -- --# undef ENTRY_CHK --# define ENTRY_CHK(name) \ -- .type __mempcpy_chk_ia32, @function; \ -- .globl __mempcpy_chk_ia32; \ -- .p2align 4; \ -- __mempcpy_chk_ia32: cfi_startproc; \ -- CALL_MCOUNT --# undef END_CHK --# define END_CHK(name) \ -- cfi_endproc; .size __mempcpy_chk_ia32, .-__mempcpy_chk_ia32 -- --# undef libc_hidden_def --# undef libc_hidden_builtin_def --/* IFUNC doesn't work with the hidden functions in shared library since -- they will be called without setting up EBX needed for PLT which is -- used by IFUNC. */ --# define libc_hidden_def(name) \ -- .globl __GI_mempcpy; __GI_mempcpy = __mempcpy_ia32 --# define libc_hidden_builtin_def(name) \ -- .globl __GI___mempcpy; __GI___mempcpy = __mempcpy_ia32 --#endif -- --#include "../mempcpy.S" ---- glibc-2.11-188-g0cbcca8/sysdeps/i386/i686/multiarch/mempcpy_chk.S -+++ glibc-2.11.90-12//dev/null -@@ -1,64 +0,0 @@ --/* Multiple versions of __mempcpy_chk -- Copyright (C) 2010 Free Software Foundation, Inc. -- Contributed by Intel Corporation. -- This file is part of the GNU C Library. -- -- The GNU C Library is free software; you can redistribute it and/or -- modify it under the terms of the GNU Lesser General Public -- License as published by the Free Software Foundation; either -- version 2.1 of the License, or (at your option) any later version. -- -- The GNU C Library is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- Lesser General Public License for more details. -- -- You should have received a copy of the GNU Lesser General Public -- License along with the GNU C Library; if not, write to the Free -- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -- 02111-1307 USA. */ -- --#include --#include -- --/* Define multiple versions only for the definition in lib and for -- DSO. There are no multiarch mempcpy functions for static binaries. -- */ --#ifndef NOT_IN_libc --# ifdef SHARED -- .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits -- .globl __i686.get_pc_thunk.bx -- .hidden __i686.get_pc_thunk.bx -- .p2align 4 -- .type __i686.get_pc_thunk.bx,@function --__i686.get_pc_thunk.bx: -- movl (%esp), %ebx -- ret -- -- .text --ENTRY(__mempcpy_chk) -- .type __mempcpy_chk, @gnu_indirect_function -- pushl %ebx -- cfi_adjust_cfa_offset (4) -- cfi_rel_offset (ebx, 0) -- call __i686.get_pc_thunk.bx -- addl $_GLOBAL_OFFSET_TABLE_, %ebx -- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx) -- jne 1f -- call __init_cpu_features --1: leal __mempcpy_chk_ia32@GOTOFF(%ebx), %eax -- testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx) -- jz 2f -- leal __mempcpy_chk_ssse3@GOTOFF(%ebx), %eax -- testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx) -- jz 2f -- leal __mempcpy_chk_ssse3_rep@GOTOFF(%ebx), %eax --2: popl %ebx -- cfi_adjust_cfa_offset (-4) -- cfi_restore (ebx) -- ret --END(__mempcpy_chk) --# else --# include "../mempcpy_chk.S" --# endif --#endif ---- glibc-2.11-188-g0cbcca8/sysdeps/i386/i686/multiarch/memset-sse2-rep.S -+++ glibc-2.11.90-12//dev/null -@@ -1,821 +0,0 @@ --/* memset with SSE2 and REP string. -- Copyright (C) 2010 Free Software Foundation, Inc. -- Contributed by Intel Corporation. -- This file is part of the GNU C Library. -- -- The GNU C Library is free software; you can redistribute it and/or -- modify it under the terms of the GNU Lesser General Public -- License as published by the Free Software Foundation; either -- version 2.1 of the License, or (at your option) any later version. -- -- The GNU C Library is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- Lesser General Public License for more details. -- -- You should have received a copy of the GNU Lesser General Public -- License along with the GNU C Library; if not, write to the Free -- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -- 02111-1307 USA. */ -- --#ifndef NOT_IN_libc -- --#include --#include "asm-syntax.h" -- --#define CFI_PUSH(REG) \ -- cfi_adjust_cfa_offset (4); \ -- cfi_rel_offset (REG, 0) -- --#define CFI_POP(REG) \ -- cfi_adjust_cfa_offset (-4); \ -- cfi_restore (REG) -- --#define PUSH(REG) pushl REG; CFI_PUSH (REG) --#define POP(REG) popl REG; CFI_POP (REG) -- --#ifdef USE_AS_BZERO --# define DEST PARMS --# define LEN DEST+4 --# define SETRTNVAL --#else --# define DEST PARMS --# define CHR DEST+4 --# define LEN CHR+4 --# define SETRTNVAL movl DEST(%esp), %eax --#endif -- --#ifdef SHARED --# define ENTRANCE PUSH (%ebx); --# define RETURN_END POP (%ebx); ret --# define RETURN RETURN_END; CFI_PUSH (%ebx) --# define PARMS 8 /* Preserve EBX. */ --# define JMPTBL(I, B) I - B -- --/* Load an entry in a jump table into EBX and branch to it. TABLE is a -- jump table with relative offsets. */ --# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ -- /* We first load PC into EBX. */ \ -- call __i686.get_pc_thunk.bx; \ -- /* Get the address of the jump table. */ \ -- add $(TABLE - .), %ebx; \ -- /* Get the entry and convert the relative offset to the \ -- absolute address. */ \ -- add (%ebx,%ecx,4), %ebx; \ -- add %ecx, %edx; \ -- /* We loaded the jump table and adjuested EDX. Go. */ \ -- jmp *%ebx -- -- .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits -- .globl __i686.get_pc_thunk.bx -- .hidden __i686.get_pc_thunk.bx -- ALIGN (4) -- .type __i686.get_pc_thunk.bx,@function --__i686.get_pc_thunk.bx: -- movl (%esp), %ebx -- ret --#else --# define ENTRANCE --# define RETURN_END ret --# define RETURN RETURN_END --# define PARMS 4 --# define JMPTBL(I, B) I -- --/* Branch to an entry in a jump table. TABLE is a jump table with -- absolute offsets. */ --# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ -- add %ecx, %edx; \ -- jmp *TABLE(,%ecx,4) --#endif -- -- .section .text.sse2,"ax",@progbits --#if defined SHARED && !defined NOT_IN_libc && !defined USE_AS_BZERO --ENTRY (__memset_chk_sse2_rep) -- movl 12(%esp), %eax -- cmpl %eax, 16(%esp) -- jb HIDDEN_JUMPTARGET (__chk_fail) --END (__memset_chk_sse2_rep) --#endif --ENTRY (__memset_sse2_rep) -- ENTRANCE -- -- movl LEN(%esp), %ecx --#ifdef USE_AS_BZERO -- xor %eax, %eax --#else -- movzbl CHR(%esp), %eax -- movb %al, %ah -- /* Fill the whole EAX with pattern. */ -- movl %eax, %edx -- shl $16, %eax -- or %edx, %eax --#endif -- movl DEST(%esp), %edx -- cmp $32, %ecx -- jae L(32bytesormore) -- --L(write_less32bytes): -- BRANCH_TO_JMPTBL_ENTRY (L(table_less_32bytes)) -- -- -- .pushsection .rodata.sse2,"a",@progbits -- ALIGN (2) --L(table_less_32bytes): -- .int JMPTBL (L(write_0bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_1bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_2bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_3bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_4bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_5bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_6bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_7bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_8bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_9bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_10bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_11bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_12bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_13bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_14bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_15bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_16bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_17bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_18bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_19bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_20bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_21bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_22bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_23bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_24bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_25bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_26bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_27bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_28bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_29bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_30bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_31bytes), L(table_less_32bytes)) -- .popsection -- -- ALIGN (4) --L(write_28bytes): -- movl %eax, -28(%edx) --L(write_24bytes): -- movl %eax, -24(%edx) --L(write_20bytes): -- movl %eax, -20(%edx) --L(write_16bytes): -- movl %eax, -16(%edx) --L(write_12bytes): -- movl %eax, -12(%edx) --L(write_8bytes): -- movl %eax, -8(%edx) --L(write_4bytes): -- movl %eax, -4(%edx) --L(write_0bytes): -- SETRTNVAL -- RETURN -- -- ALIGN (4) --L(write_29bytes): -- movl %eax, -29(%edx) --L(write_25bytes): -- movl %eax, -25(%edx) --L(write_21bytes): -- movl %eax, -21(%edx) --L(write_17bytes): -- movl %eax, -17(%edx) --L(write_13bytes): -- movl %eax, -13(%edx) --L(write_9bytes): -- movl %eax, -9(%edx) --L(write_5bytes): -- movl %eax, -5(%edx) --L(write_1bytes): -- movb %al, -1(%edx) -- SETRTNVAL -- RETURN -- -- ALIGN (4) --L(write_30bytes): -- movl %eax, -30(%edx) --L(write_26bytes): -- movl %eax, -26(%edx) --L(write_22bytes): -- movl %eax, -22(%edx) --L(write_18bytes): -- movl %eax, -18(%edx) --L(write_14bytes): -- movl %eax, -14(%edx) --L(write_10bytes): -- movl %eax, -10(%edx) --L(write_6bytes): -- movl %eax, -6(%edx) --L(write_2bytes): -- movw %ax, -2(%edx) -- SETRTNVAL -- RETURN -- -- ALIGN (4) --L(write_31bytes): -- movl %eax, -31(%edx) --L(write_27bytes): -- movl %eax, -27(%edx) --L(write_23bytes): -- movl %eax, -23(%edx) --L(write_19bytes): -- movl %eax, -19(%edx) --L(write_15bytes): -- movl %eax, -15(%edx) --L(write_11bytes): -- movl %eax, -11(%edx) --L(write_7bytes): -- movl %eax, -7(%edx) --L(write_3bytes): -- movw %ax, -3(%edx) -- movb %al, -1(%edx) -- SETRTNVAL -- RETURN -- -- ALIGN (4) --/* ECX > 32 and EDX is 4 byte aligned. */ --L(32bytesormore): -- /* Fill xmm0 with the pattern. */ --#ifdef USE_AS_BZERO -- pxor %xmm0, %xmm0 --#else -- movd %eax, %xmm0 -- punpcklbw %xmm0, %xmm0 -- pshufd $0, %xmm0, %xmm0 --#endif -- testl $0xf, %edx -- jz L(aligned_16) --/* ECX > 32 and EDX is not 16 byte aligned. */ --L(not_aligned_16): -- movdqu %xmm0, (%edx) -- movl %edx, %eax -- and $-16, %edx -- add $16, %edx -- sub %edx, %eax -- add %eax, %ecx -- movd %xmm0, %eax -- -- ALIGN (4) --L(aligned_16): -- cmp $128, %ecx ++ jae L(bk_ssse3_cpy) + jmp L(bk_write_64bytesless) + + #endif +--- glibc-2.11-215-g199428c/sysdeps/i386/i686/multiarch/memset-sse2-rep.S ++++ glibc-2.11.90-13/sysdeps/i386/i686/multiarch/memset-sse2-rep.S +@@ -261,7 +261,7 @@ L(not_aligned_16): + ALIGN (4) + L(aligned_16): + cmp $128, %ecx - jge L(128bytesormore) -- --L(aligned_16_less128bytes): -- BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) -- -- ALIGN (4) --L(128bytesormore): -- PUSH (%edi) --#ifdef DATA_CACHE_SIZE -- PUSH (%ebx) -- mov $DATA_CACHE_SIZE, %ebx --#else --# ifdef SHARED -- call __i686.get_pc_thunk.bx -- add $_GLOBAL_OFFSET_TABLE_, %ebx -- mov __x86_data_cache_size@GOTOFF(%ebx), %ebx --# else -- PUSH (%ebx) -- mov __x86_data_cache_size, %ebx --# endif --#endif -- mov %ebx, %edi -- shr $4, %ebx -- sub %ebx, %edi --#if defined DATA_CACHE_SIZE || !defined SHARED -- POP (%ebx) --#endif --/* -- * When data size approximate the end of L1 cache, -- * fast string will prefetch and combine data efficiently. -- */ -- cmp %edi, %ecx -- jae L(128bytesormore_nt) -- subl $128, %ecx --L(128bytesormore_normal): -- sub $128, %ecx -- movdqa %xmm0, (%edx) -- movdqa %xmm0, 0x10(%edx) -- movdqa %xmm0, 0x20(%edx) -- movdqa %xmm0, 0x30(%edx) -- movdqa %xmm0, 0x40(%edx) -- movdqa %xmm0, 0x50(%edx) -- movdqa %xmm0, 0x60(%edx) -- movdqa %xmm0, 0x70(%edx) -- lea 128(%edx), %edx ++ jae L(128bytesormore) + + L(aligned_16_less128bytes): + BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) +@@ -306,7 +306,7 @@ L(128bytesormore_normal): + movdqa %xmm0, 0x60(%edx) + movdqa %xmm0, 0x70(%edx) + lea 128(%edx), %edx - jl L(128bytesless_normal) -- -- -- sub $128, %ecx -- movdqa %xmm0, (%edx) -- movdqa %xmm0, 0x10(%edx) -- movdqa %xmm0, 0x20(%edx) -- movdqa %xmm0, 0x30(%edx) -- movdqa %xmm0, 0x40(%edx) -- movdqa %xmm0, 0x50(%edx) -- movdqa %xmm0, 0x60(%edx) -- movdqa %xmm0, 0x70(%edx) -- lea 128(%edx), %edx ++ jb L(128bytesless_normal) + + + sub $128, %ecx +@@ -319,7 +319,7 @@ L(128bytesormore_normal): + movdqa %xmm0, 0x60(%edx) + movdqa %xmm0, 0x70(%edx) + lea 128(%edx), %edx - jge L(128bytesormore_normal) -- --L(128bytesless_normal): -- POP (%edi) -- lea 128(%ecx), %ecx -- BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) -- -- ALIGN (4) --L(128bytesormore_nt): -- mov %edx, %edi -- mov %ecx, %edx -- shr $2, %ecx -- and $3, %edx -- rep stosl -- jz L(copy_page_by_rep_exit) -- cmp $2, %edx -- jb L(copy_page_by_rep_left_1) -- movw %ax, (%edi) -- add $2, %edi -- sub $2, %edx -- jz L(copy_page_by_rep_exit) --L(copy_page_by_rep_left_1): -- movb %al, (%edi) --L(copy_page_by_rep_exit): -- POP (%edi) -- SETRTNVAL -- RETURN -- -- .pushsection .rodata.sse2,"a",@progbits -- ALIGN (2) --L(table_16_128bytes): -- .int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_1bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_3bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_5bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_7bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_9bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_11bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_13bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_15bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_17bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_19bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_21bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_23bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_25bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_27bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_29bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_31bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_33bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_35bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_37bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_39bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_41bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_43bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_45bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_47bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_49bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_51bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_53bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_55bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_57bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_59bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_61bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_63bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_65bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_67bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_69bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_71bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_73bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_75bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_77bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_79bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_81bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_83bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_85bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_87bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_89bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_91bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_93bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_95bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_97bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_99bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_101bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_103bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_105bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_107bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_109bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_111bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_113bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_115bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_117bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_119bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_121bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_123bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_125bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_127bytes), L(table_16_128bytes)) -- .popsection -- -- ALIGN (4) --L(aligned_16_112bytes): -- movdqa %xmm0, -112(%edx) --L(aligned_16_96bytes): -- movdqa %xmm0, -96(%edx) --L(aligned_16_80bytes): -- movdqa %xmm0, -80(%edx) --L(aligned_16_64bytes): -- movdqa %xmm0, -64(%edx) --L(aligned_16_48bytes): -- movdqa %xmm0, -48(%edx) --L(aligned_16_32bytes): -- movdqa %xmm0, -32(%edx) --L(aligned_16_16bytes): -- movdqa %xmm0, -16(%edx) --L(aligned_16_0bytes): -- SETRTNVAL -- RETURN -- -- ALIGN (4) --L(aligned_16_113bytes): -- movdqa %xmm0, -113(%edx) --L(aligned_16_97bytes): -- movdqa %xmm0, -97(%edx) --L(aligned_16_81bytes): -- movdqa %xmm0, -81(%edx) --L(aligned_16_65bytes): -- movdqa %xmm0, -65(%edx) --L(aligned_16_49bytes): -- movdqa %xmm0, -49(%edx) --L(aligned_16_33bytes): -- movdqa %xmm0, -33(%edx) --L(aligned_16_17bytes): -- movdqa %xmm0, -17(%edx) --L(aligned_16_1bytes): -- movb %al, -1(%edx) -- SETRTNVAL -- RETURN -- -- ALIGN (4) --L(aligned_16_114bytes): -- movdqa %xmm0, -114(%edx) --L(aligned_16_98bytes): -- movdqa %xmm0, -98(%edx) --L(aligned_16_82bytes): -- movdqa %xmm0, -82(%edx) --L(aligned_16_66bytes): -- movdqa %xmm0, -66(%edx) --L(aligned_16_50bytes): -- movdqa %xmm0, -50(%edx) --L(aligned_16_34bytes): -- movdqa %xmm0, -34(%edx) --L(aligned_16_18bytes): -- movdqa %xmm0, -18(%edx) --L(aligned_16_2bytes): -- movw %ax, -2(%edx) -- SETRTNVAL -- RETURN -- -- ALIGN (4) --L(aligned_16_115bytes): -- movdqa %xmm0, -115(%edx) --L(aligned_16_99bytes): -- movdqa %xmm0, -99(%edx) --L(aligned_16_83bytes): -- movdqa %xmm0, -83(%edx) --L(aligned_16_67bytes): -- movdqa %xmm0, -67(%edx) --L(aligned_16_51bytes): -- movdqa %xmm0, -51(%edx) --L(aligned_16_35bytes): -- movdqa %xmm0, -35(%edx) --L(aligned_16_19bytes): -- movdqa %xmm0, -19(%edx) --L(aligned_16_3bytes): -- movw %ax, -3(%edx) -- movb %al, -1(%edx) -- SETRTNVAL -- RETURN -- -- ALIGN (4) --L(aligned_16_116bytes): -- movdqa %xmm0, -116(%edx) --L(aligned_16_100bytes): -- movdqa %xmm0, -100(%edx) --L(aligned_16_84bytes): -- movdqa %xmm0, -84(%edx) --L(aligned_16_68bytes): -- movdqa %xmm0, -68(%edx) --L(aligned_16_52bytes): -- movdqa %xmm0, -52(%edx) --L(aligned_16_36bytes): -- movdqa %xmm0, -36(%edx) --L(aligned_16_20bytes): -- movdqa %xmm0, -20(%edx) --L(aligned_16_4bytes): -- movl %eax, -4(%edx) -- SETRTNVAL -- RETURN -- -- ALIGN (4) --L(aligned_16_117bytes): -- movdqa %xmm0, -117(%edx) --L(aligned_16_101bytes): -- movdqa %xmm0, -101(%edx) --L(aligned_16_85bytes): -- movdqa %xmm0, -85(%edx) --L(aligned_16_69bytes): -- movdqa %xmm0, -69(%edx) --L(aligned_16_53bytes): -- movdqa %xmm0, -53(%edx) --L(aligned_16_37bytes): -- movdqa %xmm0, -37(%edx) --L(aligned_16_21bytes): -- movdqa %xmm0, -21(%edx) --L(aligned_16_5bytes): -- movl %eax, -5(%edx) -- movb %al, -1(%edx) -- SETRTNVAL -- RETURN -- -- ALIGN (4) --L(aligned_16_118bytes): -- movdqa %xmm0, -118(%edx) --L(aligned_16_102bytes): -- movdqa %xmm0, -102(%edx) --L(aligned_16_86bytes): -- movdqa %xmm0, -86(%edx) --L(aligned_16_70bytes): -- movdqa %xmm0, -70(%edx) --L(aligned_16_54bytes): -- movdqa %xmm0, -54(%edx) --L(aligned_16_38bytes): -- movdqa %xmm0, -38(%edx) --L(aligned_16_22bytes): -- movdqa %xmm0, -22(%edx) --L(aligned_16_6bytes): -- movl %eax, -6(%edx) -- movw %ax, -2(%edx) -- SETRTNVAL -- RETURN -- -- ALIGN (4) --L(aligned_16_119bytes): -- movdqa %xmm0, -119(%edx) --L(aligned_16_103bytes): -- movdqa %xmm0, -103(%edx) --L(aligned_16_87bytes): -- movdqa %xmm0, -87(%edx) --L(aligned_16_71bytes): -- movdqa %xmm0, -71(%edx) --L(aligned_16_55bytes): -- movdqa %xmm0, -55(%edx) --L(aligned_16_39bytes): -- movdqa %xmm0, -39(%edx) --L(aligned_16_23bytes): -- movdqa %xmm0, -23(%edx) --L(aligned_16_7bytes): -- movl %eax, -7(%edx) -- movw %ax, -3(%edx) -- movb %al, -1(%edx) -- SETRTNVAL -- RETURN -- -- ALIGN (4) --L(aligned_16_120bytes): -- movdqa %xmm0, -120(%edx) --L(aligned_16_104bytes): -- movdqa %xmm0, -104(%edx) --L(aligned_16_88bytes): -- movdqa %xmm0, -88(%edx) --L(aligned_16_72bytes): -- movdqa %xmm0, -72(%edx) --L(aligned_16_56bytes): -- movdqa %xmm0, -56(%edx) --L(aligned_16_40bytes): -- movdqa %xmm0, -40(%edx) --L(aligned_16_24bytes): -- movdqa %xmm0, -24(%edx) --L(aligned_16_8bytes): -- movq %xmm0, -8(%edx) -- SETRTNVAL -- RETURN -- -- ALIGN (4) --L(aligned_16_121bytes): -- movdqa %xmm0, -121(%edx) --L(aligned_16_105bytes): -- movdqa %xmm0, -105(%edx) --L(aligned_16_89bytes): -- movdqa %xmm0, -89(%edx) --L(aligned_16_73bytes): -- movdqa %xmm0, -73(%edx) --L(aligned_16_57bytes): -- movdqa %xmm0, -57(%edx) --L(aligned_16_41bytes): -- movdqa %xmm0, -41(%edx) --L(aligned_16_25bytes): -- movdqa %xmm0, -25(%edx) --L(aligned_16_9bytes): -- movq %xmm0, -9(%edx) -- movb %al, -1(%edx) -- SETRTNVAL -- RETURN -- -- ALIGN (4) --L(aligned_16_122bytes): -- movdqa %xmm0, -122(%edx) --L(aligned_16_106bytes): -- movdqa %xmm0, -106(%edx) --L(aligned_16_90bytes): -- movdqa %xmm0, -90(%edx) --L(aligned_16_74bytes): -- movdqa %xmm0, -74(%edx) --L(aligned_16_58bytes): -- movdqa %xmm0, -58(%edx) --L(aligned_16_42bytes): -- movdqa %xmm0, -42(%edx) --L(aligned_16_26bytes): -- movdqa %xmm0, -26(%edx) --L(aligned_16_10bytes): -- movq %xmm0, -10(%edx) -- movw %ax, -2(%edx) -- SETRTNVAL -- RETURN -- -- ALIGN (4) --L(aligned_16_123bytes): -- movdqa %xmm0, -123(%edx) --L(aligned_16_107bytes): -- movdqa %xmm0, -107(%edx) --L(aligned_16_91bytes): -- movdqa %xmm0, -91(%edx) --L(aligned_16_75bytes): -- movdqa %xmm0, -75(%edx) --L(aligned_16_59bytes): -- movdqa %xmm0, -59(%edx) --L(aligned_16_43bytes): -- movdqa %xmm0, -43(%edx) --L(aligned_16_27bytes): -- movdqa %xmm0, -27(%edx) --L(aligned_16_11bytes): -- movq %xmm0, -11(%edx) -- movw %ax, -3(%edx) -- movb %al, -1(%edx) -- SETRTNVAL -- RETURN -- -- ALIGN (4) --L(aligned_16_124bytes): -- movdqa %xmm0, -124(%edx) --L(aligned_16_108bytes): -- movdqa %xmm0, -108(%edx) --L(aligned_16_92bytes): -- movdqa %xmm0, -92(%edx) --L(aligned_16_76bytes): -- movdqa %xmm0, -76(%edx) --L(aligned_16_60bytes): -- movdqa %xmm0, -60(%edx) --L(aligned_16_44bytes): -- movdqa %xmm0, -44(%edx) --L(aligned_16_28bytes): -- movdqa %xmm0, -28(%edx) --L(aligned_16_12bytes): -- movq %xmm0, -12(%edx) -- movl %eax, -4(%edx) -- SETRTNVAL -- RETURN -- -- ALIGN (4) --L(aligned_16_125bytes): -- movdqa %xmm0, -125(%edx) --L(aligned_16_109bytes): -- movdqa %xmm0, -109(%edx) --L(aligned_16_93bytes): -- movdqa %xmm0, -93(%edx) --L(aligned_16_77bytes): -- movdqa %xmm0, -77(%edx) --L(aligned_16_61bytes): -- movdqa %xmm0, -61(%edx) --L(aligned_16_45bytes): -- movdqa %xmm0, -45(%edx) --L(aligned_16_29bytes): -- movdqa %xmm0, -29(%edx) --L(aligned_16_13bytes): -- movq %xmm0, -13(%edx) -- movl %eax, -5(%edx) -- movb %al, -1(%edx) -- SETRTNVAL -- RETURN -- -- ALIGN (4) --L(aligned_16_126bytes): -- movdqa %xmm0, -126(%edx) --L(aligned_16_110bytes): -- movdqa %xmm0, -110(%edx) --L(aligned_16_94bytes): -- movdqa %xmm0, -94(%edx) --L(aligned_16_78bytes): -- movdqa %xmm0, -78(%edx) --L(aligned_16_62bytes): -- movdqa %xmm0, -62(%edx) --L(aligned_16_46bytes): -- movdqa %xmm0, -46(%edx) --L(aligned_16_30bytes): -- movdqa %xmm0, -30(%edx) --L(aligned_16_14bytes): -- movq %xmm0, -14(%edx) -- movl %eax, -6(%edx) -- movw %ax, -2(%edx) -- SETRTNVAL -- RETURN -- -- ALIGN (4) --L(aligned_16_127bytes): -- movdqa %xmm0, -127(%edx) --L(aligned_16_111bytes): -- movdqa %xmm0, -111(%edx) --L(aligned_16_95bytes): -- movdqa %xmm0, -95(%edx) --L(aligned_16_79bytes): -- movdqa %xmm0, -79(%edx) --L(aligned_16_63bytes): -- movdqa %xmm0, -63(%edx) --L(aligned_16_47bytes): -- movdqa %xmm0, -47(%edx) --L(aligned_16_31bytes): -- movdqa %xmm0, -31(%edx) --L(aligned_16_15bytes): -- movq %xmm0, -15(%edx) -- movl %eax, -7(%edx) -- movw %ax, -3(%edx) -- movb %al, -1(%edx) -- SETRTNVAL -- RETURN_END -- --END (__memset_sse2_rep) -- --#endif ---- glibc-2.11-188-g0cbcca8/sysdeps/i386/i686/multiarch/memset-sse2.S -+++ glibc-2.11.90-12//dev/null -@@ -1,867 +0,0 @@ --/* memset with SSE2 -- Copyright (C) 2010 Free Software Foundation, Inc. -- Contributed by Intel Corporation. -- This file is part of the GNU C Library. -- -- The GNU C Library is free software; you can redistribute it and/or -- modify it under the terms of the GNU Lesser General Public -- License as published by the Free Software Foundation; either -- version 2.1 of the License, or (at your option) any later version. -- -- The GNU C Library is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- Lesser General Public License for more details. -- -- You should have received a copy of the GNU Lesser General Public -- License along with the GNU C Library; if not, write to the Free -- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -- 02111-1307 USA. */ -- --#ifndef NOT_IN_libc -- --#include --#include "asm-syntax.h" -- --#define CFI_PUSH(REG) \ -- cfi_adjust_cfa_offset (4); \ -- cfi_rel_offset (REG, 0) -- --#define CFI_POP(REG) \ -- cfi_adjust_cfa_offset (-4); \ -- cfi_restore (REG) -- --#define PUSH(REG) pushl REG; CFI_PUSH (REG) --#define POP(REG) popl REG; CFI_POP (REG) -- --#ifdef USE_AS_BZERO --# define DEST PARMS --# define LEN DEST+4 --# define SETRTNVAL --#else --# define DEST PARMS --# define CHR DEST+4 --# define LEN CHR+4 --# define SETRTNVAL movl DEST(%esp), %eax --#endif -- --#ifdef SHARED --# define ENTRANCE PUSH (%ebx); --# define RETURN_END POP (%ebx); ret --# define RETURN RETURN_END; CFI_PUSH (%ebx) --# define PARMS 8 /* Preserve EBX. */ --# define JMPTBL(I, B) I - B -- --/* Load an entry in a jump table into EBX and branch to it. TABLE is a -- jump table with relative offsets. */ --# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ -- /* We first load PC into EBX. */ \ -- call __i686.get_pc_thunk.bx; \ -- /* Get the address of the jump table. */ \ -- add $(TABLE - .), %ebx; \ -- /* Get the entry and convert the relative offset to the \ -- absolute address. */ \ -- add (%ebx,%ecx,4), %ebx; \ -- add %ecx, %edx; \ -- /* We loaded the jump table and adjuested EDX. Go. */ \ -- jmp *%ebx -- -- .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits -- .globl __i686.get_pc_thunk.bx -- .hidden __i686.get_pc_thunk.bx -- ALIGN (4) -- .type __i686.get_pc_thunk.bx,@function --__i686.get_pc_thunk.bx: -- movl (%esp), %ebx -- ret --#else --# define ENTRANCE --# define RETURN_END ret --# define RETURN RETURN_END --# define PARMS 4 --# define JMPTBL(I, B) I -- --/* Branch to an entry in a jump table. TABLE is a jump table with -- absolute offsets. */ --# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ -- add %ecx, %edx; \ -- jmp *TABLE(,%ecx,4) --#endif -- -- .section .text.sse2,"ax",@progbits --#if defined SHARED && !defined NOT_IN_libc && !defined USE_AS_BZERO --ENTRY (__memset_chk_sse2) -- movl 12(%esp), %eax -- cmpl %eax, 16(%esp) -- jb HIDDEN_JUMPTARGET (__chk_fail) --END (__memset_chk_sse2) --#endif --ENTRY (__memset_sse2) -- ENTRANCE -- -- movl LEN(%esp), %ecx --#ifdef USE_AS_BZERO -- xor %eax, %eax --#else -- movzbl CHR(%esp), %eax -- movb %al, %ah -- /* Fill the whole EAX with pattern. */ -- movl %eax, %edx -- shl $16, %eax -- or %edx, %eax --#endif -- movl DEST(%esp), %edx -- cmp $32, %ecx -- jae L(32bytesormore) -- --L(write_less32bytes): -- BRANCH_TO_JMPTBL_ENTRY (L(table_less_32bytes)) -- -- -- .pushsection .rodata.sse2,"a",@progbits -- ALIGN (2) --L(table_less_32bytes): -- .int JMPTBL (L(write_0bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_1bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_2bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_3bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_4bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_5bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_6bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_7bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_8bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_9bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_10bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_11bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_12bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_13bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_14bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_15bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_16bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_17bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_18bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_19bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_20bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_21bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_22bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_23bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_24bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_25bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_26bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_27bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_28bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_29bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_30bytes), L(table_less_32bytes)) -- .int JMPTBL (L(write_31bytes), L(table_less_32bytes)) -- .popsection -- -- ALIGN (4) --L(write_28bytes): -- movl %eax, -28(%edx) --L(write_24bytes): -- movl %eax, -24(%edx) --L(write_20bytes): -- movl %eax, -20(%edx) --L(write_16bytes): -- movl %eax, -16(%edx) --L(write_12bytes): -- movl %eax, -12(%edx) --L(write_8bytes): -- movl %eax, -8(%edx) --L(write_4bytes): -- movl %eax, -4(%edx) --L(write_0bytes): -- SETRTNVAL -- RETURN -- -- ALIGN (4) --L(write_29bytes): -- movl %eax, -29(%edx) --L(write_25bytes): -- movl %eax, -25(%edx) --L(write_21bytes): -- movl %eax, -21(%edx) --L(write_17bytes): -- movl %eax, -17(%edx) --L(write_13bytes): -- movl %eax, -13(%edx) --L(write_9bytes): -- movl %eax, -9(%edx) --L(write_5bytes): -- movl %eax, -5(%edx) --L(write_1bytes): -- movb %al, -1(%edx) -- SETRTNVAL -- RETURN -- -- ALIGN (4) --L(write_30bytes): -- movl %eax, -30(%edx) --L(write_26bytes): -- movl %eax, -26(%edx) --L(write_22bytes): -- movl %eax, -22(%edx) --L(write_18bytes): -- movl %eax, -18(%edx) --L(write_14bytes): -- movl %eax, -14(%edx) --L(write_10bytes): -- movl %eax, -10(%edx) --L(write_6bytes): -- movl %eax, -6(%edx) --L(write_2bytes): -- movw %ax, -2(%edx) -- SETRTNVAL -- RETURN -- -- ALIGN (4) --L(write_31bytes): -- movl %eax, -31(%edx) --L(write_27bytes): -- movl %eax, -27(%edx) --L(write_23bytes): -- movl %eax, -23(%edx) --L(write_19bytes): -- movl %eax, -19(%edx) --L(write_15bytes): -- movl %eax, -15(%edx) --L(write_11bytes): -- movl %eax, -11(%edx) --L(write_7bytes): -- movl %eax, -7(%edx) --L(write_3bytes): -- movw %ax, -3(%edx) -- movb %al, -1(%edx) -- SETRTNVAL -- RETURN -- -- ALIGN (4) --/* ECX > 32 and EDX is 4 byte aligned. */ --L(32bytesormore): -- /* Fill xmm0 with the pattern. */ --#ifdef USE_AS_BZERO -- pxor %xmm0, %xmm0 --#else -- movd %eax, %xmm0 -- punpcklbw %xmm0, %xmm0 -- pshufd $0, %xmm0, %xmm0 --#endif -- testl $0xf, %edx -- jz L(aligned_16) --/* ECX > 32 and EDX is not 16 byte aligned. */ --L(not_aligned_16): -- movdqu %xmm0, (%edx) -- movl %edx, %eax -- and $-16, %edx -- add $16, %edx -- sub %edx, %eax -- add %eax, %ecx -- movd %xmm0, %eax -- -- ALIGN (4) --L(aligned_16): -- cmp $128, %ecx ++ jae L(128bytesormore_normal) + + L(128bytesless_normal): + POP (%edi) +--- glibc-2.11-215-g199428c/sysdeps/i386/i686/multiarch/memset-sse2.S ++++ glibc-2.11.90-13/sysdeps/i386/i686/multiarch/memset-sse2.S +@@ -261,7 +261,7 @@ L(not_aligned_16): + ALIGN (4) + L(aligned_16): + cmp $128, %ecx - jge L(128bytesormore) -- --L(aligned_16_less128bytes): -- BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) -- -- ALIGN (4) --L(128bytesormore): --#ifdef SHARED_CACHE_SIZE -- PUSH (%ebx) -- mov $SHARED_CACHE_SIZE, %ebx --#else --# ifdef SHARED -- call __i686.get_pc_thunk.bx -- add $_GLOBAL_OFFSET_TABLE_, %ebx -- mov __x86_shared_cache_size@GOTOFF(%ebx), %ebx --# else -- PUSH (%ebx) -- mov __x86_shared_cache_size, %ebx --# endif --#endif -- cmp %ebx, %ecx -- jae L(128bytesormore_nt_start) -- -- --#ifdef DATA_CACHE_SIZE -- POP (%ebx) -- cmp $DATA_CACHE_SIZE, %ecx --#else --# ifdef SHARED -- call __i686.get_pc_thunk.bx -- add $_GLOBAL_OFFSET_TABLE_, %ebx -- cmp __x86_data_cache_size@GOTOFF(%ebx), %ecx --# else -- POP (%ebx) -- cmp __x86_data_cache_size, %ecx --# endif --#endif -- -- jae L(128bytes_L2_normal) -- subl $128, %ecx --L(128bytesormore_normal): -- sub $128, %ecx -- movdqa %xmm0, (%edx) -- movdqa %xmm0, 0x10(%edx) -- movdqa %xmm0, 0x20(%edx) -- movdqa %xmm0, 0x30(%edx) -- movdqa %xmm0, 0x40(%edx) -- movdqa %xmm0, 0x50(%edx) -- movdqa %xmm0, 0x60(%edx) -- movdqa %xmm0, 0x70(%edx) -- lea 128(%edx), %edx ++ jae L(128bytesormore) + + L(aligned_16_less128bytes): + BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) +@@ -312,7 +312,7 @@ L(128bytesormore_normal): + movdqa %xmm0, 0x60(%edx) + movdqa %xmm0, 0x70(%edx) + lea 128(%edx), %edx - jl L(128bytesless_normal) -- -- -- sub $128, %ecx -- movdqa %xmm0, (%edx) -- movdqa %xmm0, 0x10(%edx) -- movdqa %xmm0, 0x20(%edx) -- movdqa %xmm0, 0x30(%edx) -- movdqa %xmm0, 0x40(%edx) -- movdqa %xmm0, 0x50(%edx) -- movdqa %xmm0, 0x60(%edx) -- movdqa %xmm0, 0x70(%edx) -- lea 128(%edx), %edx ++ jb L(128bytesless_normal) + + + sub $128, %ecx +@@ -325,7 +325,7 @@ L(128bytesormore_normal): + movdqa %xmm0, 0x60(%edx) + movdqa %xmm0, 0x70(%edx) + lea 128(%edx), %edx - jge L(128bytesormore_normal) -- --L(128bytesless_normal): -- lea 128(%ecx), %ecx -- BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) -- -- ALIGN (4) --L(128bytes_L2_normal): -- prefetcht0 0x380(%edx) -- prefetcht0 0x3c0(%edx) -- sub $128, %ecx -- movdqa %xmm0, (%edx) -- movaps %xmm0, 0x10(%edx) -- movaps %xmm0, 0x20(%edx) -- movaps %xmm0, 0x30(%edx) -- movaps %xmm0, 0x40(%edx) -- movaps %xmm0, 0x50(%edx) -- movaps %xmm0, 0x60(%edx) -- movaps %xmm0, 0x70(%edx) -- add $128, %edx -- cmp $128, %ecx ++ jae L(128bytesormore_normal) + + L(128bytesless_normal): + lea 128(%ecx), %ecx +@@ -346,7 +346,7 @@ L(128bytes_L2_normal): + movaps %xmm0, 0x70(%edx) + add $128, %edx + cmp $128, %ecx - jge L(128bytes_L2_normal) -- --L(128bytesless_L2_normal): -- BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) -- --L(128bytesormore_nt_start): -- sub %ebx, %ecx -- ALIGN (4) --L(128bytesormore_shared_cache_loop): -- prefetcht0 0x3c0(%edx) -- prefetcht0 0x380(%edx) -- sub $0x80, %ebx -- movdqa %xmm0, (%edx) -- movdqa %xmm0, 0x10(%edx) -- movdqa %xmm0, 0x20(%edx) -- movdqa %xmm0, 0x30(%edx) -- movdqa %xmm0, 0x40(%edx) -- movdqa %xmm0, 0x50(%edx) -- movdqa %xmm0, 0x60(%edx) -- movdqa %xmm0, 0x70(%edx) -- add $0x80, %edx -- cmp $0x80, %ebx ++ jae L(128bytes_L2_normal) + + L(128bytesless_L2_normal): + BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) +@@ -368,7 +368,7 @@ L(128bytesormore_shared_cache_loop): + movdqa %xmm0, 0x70(%edx) + add $0x80, %edx + cmp $0x80, %ebx - jge L(128bytesormore_shared_cache_loop) -- cmp $0x80, %ecx -- jb L(shared_cache_loop_end) -- ALIGN (4) --L(128bytesormore_nt): -- sub $0x80, %ecx -- movntdq %xmm0, (%edx) -- movntdq %xmm0, 0x10(%edx) -- movntdq %xmm0, 0x20(%edx) -- movntdq %xmm0, 0x30(%edx) -- movntdq %xmm0, 0x40(%edx) -- movntdq %xmm0, 0x50(%edx) -- movntdq %xmm0, 0x60(%edx) -- movntdq %xmm0, 0x70(%edx) -- add $0x80, %edx -- cmp $0x80, %ecx ++ jae L(128bytesormore_shared_cache_loop) + cmp $0x80, %ecx + jb L(shared_cache_loop_end) + ALIGN (4) +@@ -384,7 +384,7 @@ L(128bytesormore_nt): + movntdq %xmm0, 0x70(%edx) + add $0x80, %edx + cmp $0x80, %ecx - jge L(128bytesormore_nt) -- sfence --L(shared_cache_loop_end): --#if defined DATA_CACHE_SIZE || !defined SHARED -- POP (%ebx) --#endif -- BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) -- -- -- .pushsection .rodata.sse2,"a",@progbits -- ALIGN (2) --L(table_16_128bytes): -- .int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_1bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_3bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_5bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_7bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_9bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_11bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_13bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_15bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_17bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_19bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_21bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_23bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_25bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_27bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_29bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_31bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_33bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_35bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_37bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_39bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_41bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_43bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_45bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_47bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_49bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_51bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_53bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_55bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_57bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_59bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_61bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_63bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_65bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_67bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_69bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_71bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_73bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_75bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_77bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_79bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_81bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_83bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_85bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_87bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_89bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_91bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_93bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_95bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_97bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_99bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_101bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_103bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_105bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_107bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_109bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_111bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_113bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_115bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_117bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_119bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_121bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_123bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_125bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes)) -- .int JMPTBL (L(aligned_16_127bytes), L(table_16_128bytes)) -- .popsection -- -- ALIGN (4) --L(aligned_16_112bytes): -- movdqa %xmm0, -112(%edx) --L(aligned_16_96bytes): -- movdqa %xmm0, -96(%edx) --L(aligned_16_80bytes): -- movdqa %xmm0, -80(%edx) --L(aligned_16_64bytes): -- movdqa %xmm0, -64(%edx) --L(aligned_16_48bytes): -- movdqa %xmm0, -48(%edx) --L(aligned_16_32bytes): -- movdqa %xmm0, -32(%edx) --L(aligned_16_16bytes): -- movdqa %xmm0, -16(%edx) --L(aligned_16_0bytes): -- SETRTNVAL -- RETURN -- -- ALIGN (4) --L(aligned_16_113bytes): -- movdqa %xmm0, -113(%edx) --L(aligned_16_97bytes): -- movdqa %xmm0, -97(%edx) --L(aligned_16_81bytes): -- movdqa %xmm0, -81(%edx) --L(aligned_16_65bytes): -- movdqa %xmm0, -65(%edx) --L(aligned_16_49bytes): -- movdqa %xmm0, -49(%edx) --L(aligned_16_33bytes): -- movdqa %xmm0, -33(%edx) --L(aligned_16_17bytes): -- movdqa %xmm0, -17(%edx) --L(aligned_16_1bytes): -- movb %al, -1(%edx) -- SETRTNVAL -- RETURN -- -- ALIGN (4) --L(aligned_16_114bytes): -- movdqa %xmm0, -114(%edx) --L(aligned_16_98bytes): -- movdqa %xmm0, -98(%edx) --L(aligned_16_82bytes): -- movdqa %xmm0, -82(%edx) --L(aligned_16_66bytes): -- movdqa %xmm0, -66(%edx) --L(aligned_16_50bytes): -- movdqa %xmm0, -50(%edx) --L(aligned_16_34bytes): -- movdqa %xmm0, -34(%edx) --L(aligned_16_18bytes): -- movdqa %xmm0, -18(%edx) --L(aligned_16_2bytes): -- movw %ax, -2(%edx) -- SETRTNVAL -- RETURN -- -- ALIGN (4) --L(aligned_16_115bytes): -- movdqa %xmm0, -115(%edx) --L(aligned_16_99bytes): -- movdqa %xmm0, -99(%edx) --L(aligned_16_83bytes): -- movdqa %xmm0, -83(%edx) --L(aligned_16_67bytes): -- movdqa %xmm0, -67(%edx) --L(aligned_16_51bytes): -- movdqa %xmm0, -51(%edx) --L(aligned_16_35bytes): -- movdqa %xmm0, -35(%edx) --L(aligned_16_19bytes): -- movdqa %xmm0, -19(%edx) --L(aligned_16_3bytes): -- movw %ax, -3(%edx) -- movb %al, -1(%edx) -- SETRTNVAL -- RETURN -- -- ALIGN (4) --L(aligned_16_116bytes): -- movdqa %xmm0, -116(%edx) --L(aligned_16_100bytes): -- movdqa %xmm0, -100(%edx) --L(aligned_16_84bytes): -- movdqa %xmm0, -84(%edx) --L(aligned_16_68bytes): -- movdqa %xmm0, -68(%edx) --L(aligned_16_52bytes): -- movdqa %xmm0, -52(%edx) --L(aligned_16_36bytes): -- movdqa %xmm0, -36(%edx) --L(aligned_16_20bytes): -- movdqa %xmm0, -20(%edx) --L(aligned_16_4bytes): -- movl %eax, -4(%edx) -- SETRTNVAL -- RETURN -- -- ALIGN (4) --L(aligned_16_117bytes): -- movdqa %xmm0, -117(%edx) --L(aligned_16_101bytes): -- movdqa %xmm0, -101(%edx) --L(aligned_16_85bytes): -- movdqa %xmm0, -85(%edx) --L(aligned_16_69bytes): -- movdqa %xmm0, -69(%edx) --L(aligned_16_53bytes): -- movdqa %xmm0, -53(%edx) --L(aligned_16_37bytes): -- movdqa %xmm0, -37(%edx) --L(aligned_16_21bytes): -- movdqa %xmm0, -21(%edx) --L(aligned_16_5bytes): -- movl %eax, -5(%edx) -- movb %al, -1(%edx) -- SETRTNVAL -- RETURN -- -- ALIGN (4) --L(aligned_16_118bytes): -- movdqa %xmm0, -118(%edx) --L(aligned_16_102bytes): -- movdqa %xmm0, -102(%edx) --L(aligned_16_86bytes): -- movdqa %xmm0, -86(%edx) --L(aligned_16_70bytes): -- movdqa %xmm0, -70(%edx) --L(aligned_16_54bytes): -- movdqa %xmm0, -54(%edx) --L(aligned_16_38bytes): -- movdqa %xmm0, -38(%edx) --L(aligned_16_22bytes): -- movdqa %xmm0, -22(%edx) --L(aligned_16_6bytes): -- movl %eax, -6(%edx) -- movw %ax, -2(%edx) -- SETRTNVAL -- RETURN -- -- ALIGN (4) --L(aligned_16_119bytes): -- movdqa %xmm0, -119(%edx) --L(aligned_16_103bytes): -- movdqa %xmm0, -103(%edx) --L(aligned_16_87bytes): -- movdqa %xmm0, -87(%edx) --L(aligned_16_71bytes): -- movdqa %xmm0, -71(%edx) --L(aligned_16_55bytes): -- movdqa %xmm0, -55(%edx) --L(aligned_16_39bytes): -- movdqa %xmm0, -39(%edx) --L(aligned_16_23bytes): -- movdqa %xmm0, -23(%edx) --L(aligned_16_7bytes): -- movl %eax, -7(%edx) -- movw %ax, -3(%edx) -- movb %al, -1(%edx) -- SETRTNVAL -- RETURN -- -- ALIGN (4) --L(aligned_16_120bytes): -- movdqa %xmm0, -120(%edx) --L(aligned_16_104bytes): -- movdqa %xmm0, -104(%edx) --L(aligned_16_88bytes): -- movdqa %xmm0, -88(%edx) --L(aligned_16_72bytes): -- movdqa %xmm0, -72(%edx) --L(aligned_16_56bytes): -- movdqa %xmm0, -56(%edx) --L(aligned_16_40bytes): -- movdqa %xmm0, -40(%edx) --L(aligned_16_24bytes): -- movdqa %xmm0, -24(%edx) --L(aligned_16_8bytes): -- movq %xmm0, -8(%edx) -- SETRTNVAL -- RETURN -- -- ALIGN (4) --L(aligned_16_121bytes): -- movdqa %xmm0, -121(%edx) --L(aligned_16_105bytes): -- movdqa %xmm0, -105(%edx) --L(aligned_16_89bytes): -- movdqa %xmm0, -89(%edx) --L(aligned_16_73bytes): -- movdqa %xmm0, -73(%edx) --L(aligned_16_57bytes): -- movdqa %xmm0, -57(%edx) --L(aligned_16_41bytes): -- movdqa %xmm0, -41(%edx) --L(aligned_16_25bytes): -- movdqa %xmm0, -25(%edx) --L(aligned_16_9bytes): -- movq %xmm0, -9(%edx) -- movb %al, -1(%edx) -- SETRTNVAL -- RETURN -- -- ALIGN (4) --L(aligned_16_122bytes): -- movdqa %xmm0, -122(%edx) --L(aligned_16_106bytes): -- movdqa %xmm0, -106(%edx) --L(aligned_16_90bytes): -- movdqa %xmm0, -90(%edx) --L(aligned_16_74bytes): -- movdqa %xmm0, -74(%edx) --L(aligned_16_58bytes): -- movdqa %xmm0, -58(%edx) --L(aligned_16_42bytes): -- movdqa %xmm0, -42(%edx) --L(aligned_16_26bytes): -- movdqa %xmm0, -26(%edx) --L(aligned_16_10bytes): -- movq %xmm0, -10(%edx) -- movw %ax, -2(%edx) -- SETRTNVAL -- RETURN -- -- ALIGN (4) --L(aligned_16_123bytes): -- movdqa %xmm0, -123(%edx) --L(aligned_16_107bytes): -- movdqa %xmm0, -107(%edx) --L(aligned_16_91bytes): -- movdqa %xmm0, -91(%edx) --L(aligned_16_75bytes): -- movdqa %xmm0, -75(%edx) --L(aligned_16_59bytes): -- movdqa %xmm0, -59(%edx) --L(aligned_16_43bytes): -- movdqa %xmm0, -43(%edx) --L(aligned_16_27bytes): -- movdqa %xmm0, -27(%edx) --L(aligned_16_11bytes): -- movq %xmm0, -11(%edx) -- movw %ax, -3(%edx) -- movb %al, -1(%edx) -- SETRTNVAL -- RETURN -- -- ALIGN (4) --L(aligned_16_124bytes): -- movdqa %xmm0, -124(%edx) --L(aligned_16_108bytes): -- movdqa %xmm0, -108(%edx) --L(aligned_16_92bytes): -- movdqa %xmm0, -92(%edx) --L(aligned_16_76bytes): -- movdqa %xmm0, -76(%edx) --L(aligned_16_60bytes): -- movdqa %xmm0, -60(%edx) --L(aligned_16_44bytes): -- movdqa %xmm0, -44(%edx) --L(aligned_16_28bytes): -- movdqa %xmm0, -28(%edx) --L(aligned_16_12bytes): -- movq %xmm0, -12(%edx) -- movl %eax, -4(%edx) -- SETRTNVAL -- RETURN -- -- ALIGN (4) --L(aligned_16_125bytes): -- movdqa %xmm0, -125(%edx) --L(aligned_16_109bytes): -- movdqa %xmm0, -109(%edx) --L(aligned_16_93bytes): -- movdqa %xmm0, -93(%edx) --L(aligned_16_77bytes): -- movdqa %xmm0, -77(%edx) --L(aligned_16_61bytes): -- movdqa %xmm0, -61(%edx) --L(aligned_16_45bytes): -- movdqa %xmm0, -45(%edx) --L(aligned_16_29bytes): -- movdqa %xmm0, -29(%edx) --L(aligned_16_13bytes): -- movq %xmm0, -13(%edx) -- movl %eax, -5(%edx) -- movb %al, -1(%edx) -- SETRTNVAL -- RETURN -- -- ALIGN (4) --L(aligned_16_126bytes): -- movdqa %xmm0, -126(%edx) --L(aligned_16_110bytes): -- movdqa %xmm0, -110(%edx) --L(aligned_16_94bytes): -- movdqa %xmm0, -94(%edx) --L(aligned_16_78bytes): -- movdqa %xmm0, -78(%edx) --L(aligned_16_62bytes): -- movdqa %xmm0, -62(%edx) --L(aligned_16_46bytes): -- movdqa %xmm0, -46(%edx) --L(aligned_16_30bytes): -- movdqa %xmm0, -30(%edx) --L(aligned_16_14bytes): -- movq %xmm0, -14(%edx) -- movl %eax, -6(%edx) -- movw %ax, -2(%edx) -- SETRTNVAL -- RETURN -- -- ALIGN (4) --L(aligned_16_127bytes): -- movdqa %xmm0, -127(%edx) --L(aligned_16_111bytes): -- movdqa %xmm0, -111(%edx) --L(aligned_16_95bytes): -- movdqa %xmm0, -95(%edx) --L(aligned_16_79bytes): -- movdqa %xmm0, -79(%edx) --L(aligned_16_63bytes): -- movdqa %xmm0, -63(%edx) --L(aligned_16_47bytes): -- movdqa %xmm0, -47(%edx) --L(aligned_16_31bytes): -- movdqa %xmm0, -31(%edx) --L(aligned_16_15bytes): -- movq %xmm0, -15(%edx) -- movl %eax, -7(%edx) -- movw %ax, -3(%edx) -- movb %al, -1(%edx) -- SETRTNVAL -- RETURN_END -- --END (__memset_sse2) -- --#endif ---- glibc-2.11-188-g0cbcca8/sysdeps/i386/i686/multiarch/memset.S -+++ glibc-2.11.90-12//dev/null -@@ -1,112 +0,0 @@ --/* Multiple versions of memset -- Copyright (C) 2010 Free Software Foundation, Inc. -- Contributed by Intel Corporation. -- This file is part of the GNU C Library. -- -- The GNU C Library is free software; you can redistribute it and/or -- modify it under the terms of the GNU Lesser General Public -- License as published by the Free Software Foundation; either -- version 2.1 of the License, or (at your option) any later version. -- -- The GNU C Library is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- Lesser General Public License for more details. -- -- You should have received a copy of the GNU Lesser General Public -- License along with the GNU C Library; if not, write to the Free -- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -- 02111-1307 USA. */ -- --#include --#include -- --/* Define multiple versions only for the definition in lib. */ --#ifndef NOT_IN_libc --# ifdef SHARED -- .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits -- .globl __i686.get_pc_thunk.bx -- .hidden __i686.get_pc_thunk.bx -- .p2align 4 -- .type __i686.get_pc_thunk.bx,@function --__i686.get_pc_thunk.bx: -- movl (%esp), %ebx -- ret -- -- .text --ENTRY(memset) -- .type memset, @gnu_indirect_function -- pushl %ebx -- cfi_adjust_cfa_offset (4) -- cfi_rel_offset (ebx, 0) -- call __i686.get_pc_thunk.bx -- addl $_GLOBAL_OFFSET_TABLE_, %ebx -- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx) -- jne 1f -- call __init_cpu_features --1: leal __memset_ia32@GOTOFF(%ebx), %eax -- testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx) -- jz 2f -- leal __memset_sse2@GOTOFF(%ebx), %eax -- testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx) -- jz 2f -- leal __memset_sse2_rep@GOTOFF(%ebx), %eax --2: popl %ebx -- cfi_adjust_cfa_offset (-4) -- cfi_restore (ebx) -- ret --END(memset) --# else -- .text --ENTRY(memset) -- .type memset, @gnu_indirect_function -- cmpl $0, KIND_OFFSET+__cpu_features -- jne 1f -- call __init_cpu_features --1: leal __memset_ia32, %eax -- testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features -- jz 2f -- leal __memset_sse2, %eax -- testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features -- jz 2f -- leal __memset_sse2_rep, %eax --2: ret --END(memset) --# endif -- --# undef ENTRY --# define ENTRY(name) \ -- .type __memset_ia32, @function; \ -- .globl __memset_ia32; \ -- .p2align 4; \ -- __memset_ia32: cfi_startproc; \ -- CALL_MCOUNT --# undef END --# define END(name) \ -- cfi_endproc; .size __memset_ia32, .-__memset_ia32 -- --# undef ENTRY_CHK --# define ENTRY_CHK(name) \ -- .type __memset_chk_ia32, @function; \ -- .globl __memset_chk_ia32; \ -- .p2align 4; \ -- __memset_chk_ia32: cfi_startproc; \ -- CALL_MCOUNT --# undef END_CHK --# define END_CHK(name) \ -- cfi_endproc; .size __memset_chk_ia32, .-__memset_chk_ia32 -- --# ifdef SHARED --# undef libc_hidden_builtin_def --/* IFUNC doesn't work with the hidden functions in shared library since -- they will be called without setting up EBX needed for PLT which is -- used by IFUNC. */ --# define libc_hidden_builtin_def(name) \ -- .globl __GI_memset; __GI_memset = __memset_ia32 --# endif -- --# undef strong_alias --# define strong_alias(original, alias) --#endif -- --#include "../memset.S" ---- glibc-2.11-188-g0cbcca8/sysdeps/i386/i686/multiarch/memset_chk.S -+++ glibc-2.11.90-12//dev/null -@@ -1,116 +0,0 @@ --/* Multiple versions of __memset_chk -- Copyright (C) 2010 Free Software Foundation, Inc. -- Contributed by Intel Corporation. -- This file is part of the GNU C Library. -- -- The GNU C Library is free software; you can redistribute it and/or -- modify it under the terms of the GNU Lesser General Public -- License as published by the Free Software Foundation; either -- version 2.1 of the License, or (at your option) any later version. -- -- The GNU C Library is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- Lesser General Public License for more details. -- -- You should have received a copy of the GNU Lesser General Public -- License along with the GNU C Library; if not, write to the Free -- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -- 02111-1307 USA. */ -- --#include --#include -- --/* Define multiple versions only for the definition in lib. */ --#ifndef NOT_IN_libc --# ifdef SHARED -- .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits -- .globl __i686.get_pc_thunk.bx -- .hidden __i686.get_pc_thunk.bx -- .p2align 4 -- .type __i686.get_pc_thunk.bx,@function --__i686.get_pc_thunk.bx: -- movl (%esp), %ebx -- ret -- -- .text --ENTRY(__memset_chk) -- .type __memset_chk, @gnu_indirect_function -- pushl %ebx -- cfi_adjust_cfa_offset (4) -- cfi_rel_offset (ebx, 0) -- call __i686.get_pc_thunk.bx -- addl $_GLOBAL_OFFSET_TABLE_, %ebx -- cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx) -- jne 1f -- call __init_cpu_features --1: leal __memset_chk_ia32@GOTOFF(%ebx), %eax -- testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx) -- jz 2f -- leal __memset_chk_sse2@GOTOFF(%ebx), %eax -- testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx) -- jz 2f -- leal __memset_chk_sse2_rep@GOTOFF(%ebx), %eax --2: popl %ebx -- cfi_adjust_cfa_offset (-4) -- cfi_restore (ebx) -- ret --END(__memset_chk) -- --strong_alias (__memset_chk, __memset_zero_constant_len_parameter) -- .section .gnu.warning.__memset_zero_constant_len_parameter -- .string "memset used with constant zero length parameter; this could be due to transposed parameters" --# else -- .text --ENTRY(__memset_chk) -- .type __memset_chk, @gnu_indirect_function -- cmpl $0, KIND_OFFSET+__cpu_features -- jne 1f -- call __init_cpu_features --1: leal __memset_chk_ia32, %eax -- testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features -- jz 2f -- leal __memset_chk_sse2, %eax -- testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features -- jz 2f -- leal __memset_chk_sse2_rep, %eax --2: ret --END(__memset_chk) -- -- .type __memset_chk_sse2, @function -- .p2align 4; --__memset_chk_sse2: -- cfi_startproc -- CALL_MCOUNT -- movl 12(%esp), %eax -- cmpl %eax, 16(%esp) -- jb __chk_fail -- jmp __memset_sse2 -- cfi_endproc -- .size __memset_chk_sse2, .-__memset_chk_sse2 -- -- .type __memset_chk_sse2_rep, @function -- .p2align 4; --__memset_chk_sse2_rep: -- cfi_startproc -- CALL_MCOUNT -- movl 12(%esp), %eax -- cmpl %eax, 16(%esp) -- jb __chk_fail -- jmp __memset_sse2_rep -- cfi_endproc -- .size __memset_chk_sse2_rep, .-__memset_chk_sse2_rep -- -- .type __memset_chk_ia32, @function -- .p2align 4; --__memset_chk_ia32: -- cfi_startproc -- CALL_MCOUNT -- movl 12(%esp), %eax -- cmpl %eax, 16(%esp) -- jb __chk_fail -- jmp __memset_ia32 -- cfi_endproc -- .size __memset_chk_ia32, .-__memset_chk_ia32 --# endif --#endif ---- glibc-2.11-188-g0cbcca8/sysdeps/i386/sysdep.h -+++ glibc-2.11.90-12/sysdeps/i386/sysdep.h -@@ -67,9 +67,6 @@ - ASM_SIZE_DIRECTIVE(name) \ - STABS_FUN_END(name) - --#define ENTRY_CHK(name) ENTRY (name) --#define END_CHK(name) END (name) -- - #ifdef HAVE_CPP_ASM_DEBUGINFO - /* Disable that goop, because we just pass -g through to the assembler - and it generates proper line number information directly. */ ---- glibc-2.11-188-g0cbcca8/sysdeps/ia64/Makefile -+++ glibc-2.11.90-12/sysdeps/ia64/Makefile ++ jae L(128bytesormore_nt) + sfence + L(shared_cache_loop_end): + #if defined DATA_CACHE_SIZE || !defined SHARED +--- glibc-2.11-215-g199428c/sysdeps/ia64/Makefile ++++ glibc-2.11.90-13/sysdeps/ia64/Makefile @@ -12,8 +12,8 @@ elide-routines.os += hp-timing ifeq (yes,$(build-shared)) @@ -7741,8 +1971,8 @@ endif endif ---- glibc-2.11-188-g0cbcca8/sysdeps/ia64/ia64libgcc.S -+++ glibc-2.11.90-12/sysdeps/ia64/ia64libgcc.S +--- glibc-2.11-215-g199428c/sysdeps/ia64/ia64libgcc.S ++++ glibc-2.11.90-13/sysdeps/ia64/ia64libgcc.S @@ -1,350 +0,0 @@ -/* From the Intel IA-64 Optimization Guide, choose the minimum latency - alternative. */ @@ -8094,8 +2324,8 @@ - .symver ___multi3, __multi3@GLIBC_2.2 - -#endif ---- glibc-2.11-188-g0cbcca8/sysdeps/ia64/libgcc-compat.c -+++ glibc-2.11.90-12/sysdeps/ia64/libgcc-compat.c +--- glibc-2.11-215-g199428c/sysdeps/ia64/libgcc-compat.c ++++ glibc-2.11.90-13/sysdeps/ia64/libgcc-compat.c @@ -0,0 +1,84 @@ +/* pre-.hidden libgcc compatibility + Copyright (C) 2002 Free Software Foundation, Inc. @@ -8181,8 +2411,8 @@ +symbol_version (INTUSE (__multi3), __multi3, GLIBC_2.2); + +#endif ---- glibc-2.11-188-g0cbcca8/sysdeps/powerpc/powerpc64/Makefile -+++ glibc-2.11.90-12/sysdeps/powerpc/powerpc64/Makefile +--- glibc-2.11-215-g199428c/sysdeps/powerpc/powerpc64/Makefile ++++ glibc-2.11.90-13/sysdeps/powerpc/powerpc64/Makefile @@ -30,6 +30,7 @@ ifneq ($(elf),no) # we use -fpic instead which is much better. CFLAGS-initfini.s += -fpic -O1 @@ -8191,8 +2421,8 @@ endif ifeq ($(subdir),elf) ---- glibc-2.11-188-g0cbcca8/sysdeps/unix/nice.c -+++ glibc-2.11.90-12/sysdeps/unix/nice.c +--- glibc-2.11-215-g199428c/sysdeps/unix/nice.c ++++ glibc-2.11.90-13/sysdeps/unix/nice.c @@ -42,7 +42,12 @@ nice (int incr) __set_errno (save); } @@ -8207,8 +2437,8 @@ if (result == -1) { if (errno == EACCES) ---- glibc-2.11-188-g0cbcca8/sysdeps/unix/sysv/linux/check_pf.c -+++ glibc-2.11.90-12/sysdeps/unix/sysv/linux/check_pf.c +--- glibc-2.11-215-g199428c/sysdeps/unix/sysv/linux/check_pf.c ++++ glibc-2.11.90-13/sysdeps/unix/sysv/linux/check_pf.c @@ -27,13 +27,10 @@ #include #include @@ -8224,8 +2454,8 @@ #ifndef IFA_F_HOMEADDRESS # define IFA_F_HOMEADDRESS 0 ---- glibc-2.11-188-g0cbcca8/sysdeps/unix/sysv/linux/dl-osinfo.h -+++ glibc-2.11.90-12/sysdeps/unix/sysv/linux/dl-osinfo.h +--- glibc-2.11-215-g199428c/sysdeps/unix/sysv/linux/dl-osinfo.h ++++ glibc-2.11.90-13/sysdeps/unix/sysv/linux/dl-osinfo.h @@ -17,10 +17,13 @@ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ @@ -8273,8 +2503,8 @@ } else #endif ---- glibc-2.11-188-g0cbcca8/sysdeps/unix/sysv/linux/futimesat.c -+++ glibc-2.11.90-12/sysdeps/unix/sysv/linux/futimesat.c +--- glibc-2.11-215-g199428c/sysdeps/unix/sysv/linux/futimesat.c ++++ glibc-2.11.90-13/sysdeps/unix/sysv/linux/futimesat.c @@ -37,14 +37,14 @@ futimesat (fd, file, tvp) { int result; @@ -8317,8 +2547,8 @@ { size_t filelen = strlen (file); if (__builtin_expect (filelen == 0, 0)) ---- glibc-2.11-188-g0cbcca8/sysdeps/unix/sysv/linux/i386/dl-cache.h -+++ glibc-2.11.90-12/sysdeps/unix/sysv/linux/i386/dl-cache.h +--- glibc-2.11-215-g199428c/sysdeps/unix/sysv/linux/i386/dl-cache.h ++++ glibc-2.11.90-13/sysdeps/unix/sysv/linux/i386/dl-cache.h @@ -0,0 +1,59 @@ +/* Support for reading /etc/ld.so.cache files written by Linux ldconfig. + Copyright (C) 2004 Free Software Foundation, Inc. @@ -8379,8 +2609,8 @@ + } while (0) + +#include_next ---- glibc-2.11-188-g0cbcca8/sysdeps/unix/sysv/linux/ia64/dl-cache.h -+++ glibc-2.11.90-12/sysdeps/unix/sysv/linux/ia64/dl-cache.h +--- glibc-2.11-215-g199428c/sysdeps/unix/sysv/linux/ia64/dl-cache.h ++++ glibc-2.11.90-13/sysdeps/unix/sysv/linux/ia64/dl-cache.h @@ -22,4 +22,31 @@ #define _dl_cache_check_flags(flags) \ ((flags) == _DL_CACHE_DEFAULT_ID) @@ -8413,29 +2643,29 @@ + } while (0) + #include_next ---- glibc-2.11-188-g0cbcca8/sysdeps/unix/sysv/linux/ia64/dl-procinfo.c -+++ glibc-2.11.90-12/sysdeps/unix/sysv/linux/ia64/dl-procinfo.c +--- glibc-2.11-215-g199428c/sysdeps/unix/sysv/linux/ia64/dl-procinfo.c ++++ glibc-2.11.90-13/sysdeps/unix/sysv/linux/ia64/dl-procinfo.c @@ -0,0 +1,5 @@ +#ifdef IS_IN_ldconfig +#include +#else +#include +#endif ---- glibc-2.11-188-g0cbcca8/sysdeps/unix/sysv/linux/ia64/dl-procinfo.h -+++ glibc-2.11.90-12/sysdeps/unix/sysv/linux/ia64/dl-procinfo.h +--- glibc-2.11-215-g199428c/sysdeps/unix/sysv/linux/ia64/dl-procinfo.h ++++ glibc-2.11.90-13/sysdeps/unix/sysv/linux/ia64/dl-procinfo.h @@ -0,0 +1,5 @@ +#ifdef IS_IN_ldconfig +#include +#else +#include +#endif ---- glibc-2.11-188-g0cbcca8/sysdeps/unix/sysv/linux/ia64/ldd-rewrite.sed -+++ glibc-2.11.90-12/sysdeps/unix/sysv/linux/ia64/ldd-rewrite.sed +--- glibc-2.11-215-g199428c/sysdeps/unix/sysv/linux/ia64/ldd-rewrite.sed ++++ glibc-2.11.90-13/sysdeps/unix/sysv/linux/ia64/ldd-rewrite.sed @@ -1 +1 @@ -s_^\(RTLDLIST=\)\([^ ]*\)-ia64\(\.so\.[0-9.]*\)[ ]*$_\1"\2-ia64\3 \2\3"_ +s_^\(RTLDLIST=\)\([^ ]*\)-ia64\(\.so\.[0-9.]*\)[ ]*$_\1"\2-ia64\3 /emul/ia32-linux\2\3"_ ---- glibc-2.11-188-g0cbcca8/sysdeps/unix/sysv/linux/netlinkaccess.h -+++ glibc-2.11.90-12/sysdeps/unix/sysv/linux/netlinkaccess.h +--- glibc-2.11-215-g199428c/sysdeps/unix/sysv/linux/netlinkaccess.h ++++ glibc-2.11.90-13/sysdeps/unix/sysv/linux/netlinkaccess.h @@ -25,6 +25,24 @@ #include @@ -8461,8 +2691,8 @@ struct netlink_res { ---- glibc-2.11-188-g0cbcca8/sysdeps/unix/sysv/linux/paths.h -+++ glibc-2.11.90-12/sysdeps/unix/sysv/linux/paths.h +--- glibc-2.11-215-g199428c/sysdeps/unix/sysv/linux/paths.h ++++ glibc-2.11.90-13/sysdeps/unix/sysv/linux/paths.h @@ -62,7 +62,7 @@ #define _PATH_TTY "/dev/tty" #define _PATH_UNIX "/boot/vmlinux" @@ -8472,13 +2702,13 @@ #define _PATH_WTMP "/var/log/wtmp" /* Provide trailing slash, since mostly used for building pathnames. */ ---- glibc-2.11-188-g0cbcca8/sysdeps/unix/sysv/linux/sparc/sparc64/fxstat.c -+++ glibc-2.11.90-12/sysdeps/unix/sysv/linux/sparc/sparc64/fxstat.c +--- glibc-2.11-215-g199428c/sysdeps/unix/sysv/linux/sparc/sparc64/fxstat.c ++++ glibc-2.11.90-13/sysdeps/unix/sysv/linux/sparc/sparc64/fxstat.c @@ -1 +1 @@ -#include "../../fxstat.c" +#include "../../i386/fxstat.c" ---- glibc-2.11-188-g0cbcca8/sysdeps/unix/sysv/linux/tcsetattr.c -+++ glibc-2.11.90-12/sysdeps/unix/sysv/linux/tcsetattr.c +--- glibc-2.11-215-g199428c/sysdeps/unix/sysv/linux/tcsetattr.c ++++ glibc-2.11.90-13/sysdeps/unix/sysv/linux/tcsetattr.c @@ -49,6 +49,7 @@ tcsetattr (fd, optional_actions, termios_p) { struct __kernel_termios k_termios; @@ -8524,137 +2754,8 @@ + return retval; } libc_hidden_def (tcsetattr) ---- glibc-2.11-188-g0cbcca8/sysdeps/x86_64/cacheinfo.c -+++ glibc-2.11.90-12/sysdeps/x86_64/cacheinfo.c -@@ -74,7 +74,6 @@ static const struct intel_02_cache_info - { 0x0a, 2, 32, M(_SC_LEVEL1_DCACHE_SIZE), 8192 }, - { 0x0c, 4, 32, M(_SC_LEVEL1_DCACHE_SIZE), 16384 }, - { 0x0d, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 }, -- { 0x0e, 6, 64, M(_SC_LEVEL1_DCACHE_SIZE), 24576 }, - { 0x21, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 }, - { 0x22, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 524288 }, - { 0x23, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 }, -@@ -114,7 +113,6 @@ static const struct intel_02_cache_info - { 0x7c, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 }, - { 0x7d, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 2097152 }, - { 0x7f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 }, -- { 0x80, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 }, - { 0x82, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 262144 }, - { 0x83, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 524288 }, - { 0x84, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 1048576 }, -@@ -454,10 +452,9 @@ __cache_sysconf (int name) - } - - --/* Data cache size for use in memory and string routines, typically -+/* Half the data cache size for use in memory and string routines, typically - L1 size. */ - long int __x86_64_data_cache_size_half attribute_hidden = 32 * 1024 / 2; --long int __x86_64_data_cache_size attribute_hidden = 32 * 1024; - /* Shared cache size for use in memory and string routines, typically - L2 or L3 size. */ - long int __x86_64_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2; -@@ -660,10 +657,7 @@ init_cacheinfo (void) - } - - if (data > 0) -- { -- __x86_64_data_cache_size_half = data / 2; -- __x86_64_data_cache_size = data; -- } -+ __x86_64_data_cache_size_half = data / 2; - - if (shared > 0) - { ---- glibc-2.11-188-g0cbcca8/sysdeps/x86_64/multiarch/ifunc-defines.sym -+++ glibc-2.11.90-12/sysdeps/x86_64/multiarch/ifunc-defines.sym -@@ -13,8 +13,5 @@ CPUID_ECX_OFFSET offsetof (struct cpuid_registers, ecx) - CPUID_EDX_OFFSET offsetof (struct cpuid_registers, edx) - FAMILY_OFFSET offsetof (struct cpu_features, family) - MODEL_OFFSET offsetof (struct cpu_features, model) --FEATURE_OFFSET offsetof (struct cpu_features, feature) --FEATURE_SIZE sizeof (unsigned int) - - COMMON_CPUID_INDEX_1 --FEATURE_INDEX_1 ---- glibc-2.11-188-g0cbcca8/sysdeps/x86_64/multiarch/init-arch.c -+++ glibc-2.11.90-12/sysdeps/x86_64/multiarch/init-arch.c -@@ -64,23 +64,7 @@ __init_cpu_features (void) - __cpu_features.model += extended_model; - } - else if (__cpu_features.family == 0x06) -- { -- __cpu_features.model += extended_model; -- switch (__cpu_features.model) -- { -- case 0x1a: -- case 0x1e: -- case 0x1f: -- case 0x25: -- case 0x2e: -- case 0x2f: -- /* Rep string instructions are fast on Intel Core i3, i5 -- and i7. */ -- __cpu_features.feature[index_Fast_Rep_String] -- |= bit_Fast_Rep_String; -- break; -- } -- } -+ __cpu_features.model += extended_model; - } - /* This spells out "AuthenticAMD". */ - else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) ---- glibc-2.11-188-g0cbcca8/sysdeps/x86_64/multiarch/init-arch.h -+++ glibc-2.11.90-12/sysdeps/x86_64/multiarch/init-arch.h -@@ -16,8 +16,6 @@ - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - --#define bit_Fast_Rep_String (1 << 0) -- - #ifdef __ASSEMBLER__ - - #include -@@ -30,8 +28,6 @@ - #define index_SSSE3 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET - #define index_SSE4_2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET - --#define index_Fast_Rep_String FEATURE_INDEX_1*FEATURE_SIZE -- - #else /* __ASSEMBLER__ */ - - #include -@@ -43,13 +39,6 @@ enum - COMMON_CPUID_INDEX_MAX - }; - --enum -- { -- FEATURE_INDEX_1 = 0, -- /* Keep the following line at the end. */ -- FEATURE_INDEX_MAX -- }; -- - extern struct cpu_features - { - enum -@@ -69,7 +58,6 @@ extern struct cpu_features - } cpuid[COMMON_CPUID_INDEX_MAX]; - unsigned int family; - unsigned int model; -- unsigned int feature[FEATURE_INDEX_MAX]; - } __cpu_features attribute_hidden; - - -@@ -98,6 +86,4 @@ extern const struct cpu_features *__get_cpu_features (void) - #define HAS_SSE4_2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 20) - #define HAS_FMA HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 12) - --#define index_Fast_Rep_String FEATURE_INDEX_1 -- - #endif /* __ASSEMBLER__ */ ---- glibc-2.11-188-g0cbcca8/timezone/zic.c -+++ glibc-2.11.90-12/timezone/zic.c +--- glibc-2.11-215-g199428c/timezone/zic.c ++++ glibc-2.11.90-13/timezone/zic.c @@ -1921,7 +1921,7 @@ const int zonecount; if (stdrp != NULL && stdrp->r_hiyear == 2037) return; diff --git a/glibc.spec b/glibc.spec index 80909c5..cb03c3c 100644 --- a/glibc.spec +++ b/glibc.spec @@ -1,4 +1,4 @@ -%define glibcsrcdir glibc-2.11-188-g0cbcca8 +%define glibcsrcdir glibc-2.11-215-g199428c %define glibcversion 2.11.90 ### glibc.spec.in follows: %define run_glibc_tests 1 @@ -24,7 +24,7 @@ Summary: The GNU libc libraries Name: glibc Version: %{glibcversion} -Release: 12 +Release: 13 # GPLv2+ is used in a bunch of programs, LGPLv2+ is used for libraries. # Things that are linked directly into dynamically linked programs # and shared libraries (e.g. crt files, lib*_nonshared.a) have an additional @@ -950,12 +950,10 @@ rm -f *.filelist* %endif %endif %ifarch s390x -%dir /lib /lib/ld64.so.1 %endif %ifarch ia64 %if "%{_lib}" == "lib64" -%dir /lib /lib/ld-linux-ia64.so.2 %endif %endif @@ -982,7 +980,6 @@ rm -f *.filelist* %ifnarch %{auxarches} %files -f common.filelist common %defattr(-,root,root) -%dir %{_prefix}/lib/locale %attr(0644,root,root) %verify(not md5 size mtime) %{_prefix}/lib/locale/locale-archive.tmpl %attr(0644,root,root) %verify(not md5 size mtime mode) %ghost %config(missingok,noreplace) %{_prefix}/lib/locale/locale-archive %dir %attr(755,root,root) /etc/default @@ -1033,6 +1030,16 @@ rm -f *.filelist* %endif %changelog +* Mon Feb 22 2010 Andreas Schwab - 2.11.90-13 +- Update from master + - Use CPUID_OFFSET instead of FEATURE_OFFSET + - Add 32bit memcmp/strcmp/strncmp optimized for SSSE3/SSS4.2 + - Fix file descriotor leak in nftw with FTW_CHDIR (BZ#11271) + - Add Sparc STT_GNU_IFUNC support + - Add power7-optimized classification functions +- Reapply "Optimize 32bit memset/memcpy with SSE2/SSSE3." +- Use unsigned comparison in sse memcpy/memset + * Mon Feb 8 2010 Andreas Schwab - 2.11.90-12 - Update from master - Update constants in for current kernels (#11235) diff --git a/import.log b/import.log index a80fa61..927728c 100644 --- a/import.log +++ b/import.log @@ -44,3 +44,4 @@ glibc-2_11_90-9:HEAD:glibc-2.11.90-9.src.rpm:1263553804 glibc-2_11_90-10:HEAD:glibc-2.11.90-10.src.rpm:1264006094 glibc-2_11_90-11:HEAD:glibc-2.11.90-11.src.rpm:1265032871 glibc-2_11_90-12:HEAD:glibc-2.11.90-12.src.rpm:1265642830 +glibc-2_11_90-13:F-13:glibc-2.11.90-13.src.rpm:1266853673 diff --git a/sources b/sources index 1feb156..83cd07d 100644 --- a/sources +++ b/sources @@ -1,2 +1,2 @@ -bd4ead4a2f894434acb52840bed4c17d glibc-2.11-188-g0cbcca8-fedora.tar.bz2 -6f26cac5f9a78260e97a648b4ca46e7e glibc-2.11-188-g0cbcca8.tar.bz2 +872a06858dd16ca5aad8392f33164cc4 glibc-2.11-215-g199428c-fedora.tar.bz2 +39b9d94c614d330bdfa4f16a9b9442df glibc-2.11-215-g199428c.tar.bz2