Fix GCC inline assembly syntax

The output registers and memory clobber were not being properly
specified, which can cause incorrect compiler transformations when these
functions are inlined cross-module via ThinLTO.

Bug: b/135066502
Change-Id: I7031d5df2d9fe0f2712e65b98cfdf5b0990db598
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/32748
Presubmit-Ready: Nicolas Capens <nicolascapens@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
diff --git a/src/Common/Memory.cpp b/src/Common/Memory.cpp
index 779d068..e7d4ac7 100644
--- a/src/Common/Memory.cpp
+++ b/src/Common/Memory.cpp
@@ -137,7 +137,7 @@
 	#if defined(_MSC_VER) && defined(__x86__) && !defined(MEMORY_SANITIZER)
 		__stosw(memory, element, count);
 	#elif defined(__GNUC__) && defined(__x86__) && !defined(MEMORY_SANITIZER)
-		__asm__("rep stosw" : : "D"(memory), "a"(element), "c"(count));
+		__asm__ __volatile__("rep stosw" : "+D"(memory), "+c"(count) : "a"(element) : "memory");
 	#else
 		for(size_t i = 0; i < count; i++)
 		{
@@ -151,7 +151,7 @@
 	#if defined(_MSC_VER) && defined(__x86__) && !defined(MEMORY_SANITIZER)
 		__stosd((unsigned long*)memory, element, count);
 	#elif defined(__GNUC__) && defined(__x86__) && !defined(MEMORY_SANITIZER)
-		__asm__("rep stosl" : : "D"(memory), "a"(element), "c"(count));
+		__asm__ __volatile__("rep stosl" : "+D"(memory), "+c"(count) : "a"(element) : "memory");
 	#else
 		for(size_t i = 0; i < count; i++)
 		{
@@ -159,4 +159,5 @@
 		}
 	#endif
 }
+
 }
diff --git a/src/System/Memory.cpp b/src/System/Memory.cpp
index ffaab71..663732f 100644
--- a/src/System/Memory.cpp
+++ b/src/System/Memory.cpp
@@ -137,7 +137,7 @@
 	#if defined(_MSC_VER) && defined(__x86__) && !defined(MEMORY_SANITIZER)
 		__stosw(memory, element, count);
 	#elif defined(__GNUC__) && defined(__x86__) && !defined(MEMORY_SANITIZER)
-		__asm__("rep stosw" : : "D"(memory), "a"(element), "c"(count));
+		__asm__ __volatile__("rep stosw" : "+D"(memory), "+c"(count) : "a"(element) : "memory");
 	#else
 		for(size_t i = 0; i < count; i++)
 		{
@@ -151,7 +151,7 @@
 	#if defined(_MSC_VER) && defined(__x86__) && !defined(MEMORY_SANITIZER)
 		__stosd((unsigned long*)memory, element, count);
 	#elif defined(__GNUC__) && defined(__x86__) && !defined(MEMORY_SANITIZER)
-		__asm__("rep stosl" : : "D"(memory), "a"(element), "c"(count));
+		__asm__ __volatile__("rep stosl" : "+D"(memory), "+c"(count) : "a"(element) : "memory");
 	#else
 		for(size_t i = 0; i < count; i++)
 		{