implement several sse2 intrinsics inside vec2d, __m128d in vec2d union

author Jean-Philippe Bruyère <jp_bruyere@hotmail.com>

Thu, 12 Aug 2021 12:21:00 +0000 (14:21 +0200)

committer Jean-Philippe Bruyère <jp_bruyere@hotmail.com>

Thu, 12 Aug 2021 12:21:00 +0000 (14:21 +0200)
author Jean-Philippe Bruyère <jp_bruyere@hotmail.com>
Thu, 12 Aug 2021 12:21:00 +0000 (14:21 +0200)
committer Jean-Philippe Bruyère <jp_bruyere@hotmail.com>
Thu, 12 Aug 2021 12:21:00 +0000 (14:21 +0200)
diff --git a/CMakeLists.txt b/CMakeLists.txt

index 2a66d2b4957cba507c6cefe1b59f91cfefe945f4..12d9aa538d2185456ab22ff05223967e50bbc6fc 100644 (file)
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -44,7 +44,7 @@ ELSE()
         UNSET(ENABLE_DBG_UTILS CACHE)
         UNSET(ENABLE_WIRED_FILL CACHE)
         IF (UNIX)
-           SET(CMAKE_${LANG}_FLAGS "-O3 -march=native -w ")
+           SET(CMAKE_${LANG}_FLAGS "-O3 -march=native -msse2 -w ")
         ELSEIF(MSVC)
             SET(CMAKE_${LANG}_FLAGS "/O2 /TC /W0")
         ENDIF()
diff --git a/src/vectors.h b/src/vectors.h

index e0699e80a3438550ec620abfa7fb9f69db74190b..c4de20604156584b29a70b57665f90c2477db376 100644 (file)
--- a/src/vectors.h
+++ b/src/vectors.h
@@ -26,20 +26,26 @@
  #include <immintrin.h>
  
  typedef union {
-       float v2si __attribute__ ((vector_size (8)));
         struct {
                 float x;
                 float y;
         };
  }vec2;
  
-typedef union {
-       __m128d raw;// __attribute__ ((vector_size (16)));
-       struct {
+#ifdef __SSE2__
+       typedef union {
+               __m128d raw;
+               struct {
+                       double x;
+                       double y;
+               };
+       }vec2d;
+#else
+       typedef struct {
                 double x;
                 double y;
-       };
-}vec2d;
+       }vec2d;
+#endif
  
  typedef struct {
         float x;
@@ -79,6 +85,7 @@ typedef struct {
         int16_t x;
         int16_t y;
  }vec2i16;
+
  // compute length of float vector 2d
  vkvg_inline    float vec2_length(vec2 v){
         return sqrtf (v.x*v.x + v.y*v.y);
@@ -111,27 +118,53 @@ vkvg_inline       vec2 vec2_norm(vec2 a)
         float m = sqrtf (a.x*a.x + a.y*a.y);
         return (vec2){a.x/m, a.y/m};
  }
-// normalize double vector
-vkvg_inline    vec2d vec2d_norm(vec2d a)
-{
-       double m = sqrt (a.x*a.x + a.y*a.y);
-       return (vec2d){a.x/m, a.y/m};
+// devide 2d vector by scalar
+vkvg_inline    vec2 vec2_div(vec2 a, float m){
+       return (vec2){a.x/m,a.y/m};
+}
+// multiply 2d vector by scalar
+vkvg_inline    vec2 vec2_mult(vec2 a, float m){
+       return (vec2){a.x*m,a.y*m};
+}
+// compute sum of two double precision vectors
+vkvg_inline    vec2d vec2d_add (vec2d a, vec2d b){
+#ifdef __SSE2__
+       return (vec2d)_mm_add_pd (a.raw, b.raw);
+#else
+       return (vec2d){a.x + b.x, a.y + b.y};
+#endif
+}
+// compute subbstraction of two double precision vectors
+vkvg_inline    vec2d vec2d_sub (vec2d a, vec2d b){
+#ifdef __SSE2__
+       return (vec2d)_mm_sub_pd (a.raw, b.raw);
+#else
+       return (vec2d){a.x - b.x, a.y - b.y};
+#endif
  }
  // multiply 2d vector by scalar
  vkvg_inline    vec2d vec2d_mult(vec2d a, double m){
+#ifdef __SSE2__
+       return (vec2d)_mm_mul_pd (a.raw, _mm_set_pd1 (m));
+#else
         return (vec2d){a.x*m,a.y*m};
-}
-// devide 2d vector by scalar
-vkvg_inline    vec2 vec2_div(vec2 a, float m){
-       return (vec2){a.x/m,a.y/m};
+#endif
+
  }
  vkvg_inline    vec2d vec2d_div(vec2d a, double m){
+#ifdef __SSE2__
         return (vec2d)_mm_div_pd (a.raw, _mm_set_pd1 (m));
-       //return (vec2d){a.x/m,a.y/m};
+#else
+       return (vec2d){a.x/m,a.y/m};
+#endif
  }
-// multiply 2d vector by scalar
-vkvg_inline    vec2 vec2_mult(vec2 a, float m){
-       return (vec2){a.x*m,a.y*m};
+
+// normalize double vector
+vkvg_inline    vec2d vec2d_norm(vec2d a)
+{
+       double m = sqrt (a.x*a.x + a.y*a.y);
+       return (vec2d)vec2d_div (a, m);
+       //return (vec2d){a.x/m, a.y/m};
  }
  // compute perpendicular vector
  vkvg_inline    vec2d vec2d_perp (vec2d a){
@@ -149,20 +182,10 @@ vkvg_inline       vec2 vec2d_to_vec2(vec2d vd){
  vkvg_inline    vec2 vec2_add (vec2 a, vec2 b){
         return (vec2){a.x + b.x, a.y + b.y};
  }
-// compute sum of two double precision vectors
-vkvg_inline    vec2d vec2d_add (vec2d a, vec2d b){
-       return (vec2d)_mm_add_pd (a.raw, b.raw);
-       //return (vec2d){a.x + b.x, a.y + b.y};
-}
  // compute subbstraction of two single precision vectors
  vkvg_inline    vec2 vec2_sub (vec2 a, vec2 b){
         return (vec2){a.x - b.x, a.y - b.y};
  }
-// compute subbstraction of two double precision vectors
-vkvg_inline    vec2d vec2d_sub (vec2d a, vec2d b){
-       return (vec2d)_mm_sub_pd (a.raw, b.raw);
-       //return (vec2d){a.x - b.x, a.y - b.y};
-}
  // test equality of two single precision vectors
  vkvg_inline    bool vec2_equ (vec2 a, vec2 b){
         return (EQUF(a.x,b.x)&EQUF(a.y,b.y));
diff --git a/src/vkvg_context_internal.c b/src/vkvg_context_internal.c

index 694b95205fe6553c805cdb3e060ba77e256de796..03d9ec00d87b1624bf61754d242b5a8339e4f462 100644 (file)
--- a/src/vkvg_context_internal.c
+++ b/src/vkvg_context_internal.c
@@ -87,11 +87,11 @@ bool _check_point_array (VkvgContext ctx){
         if (ctx->sizePoints - ctx->pointCount > VKVG_ARRAY_THRESHOLD)
                 return false;
         ctx->sizePoints += VKVG_PTS_SIZE;
-       vec2* tmp = (vec2*) realloc (ctx->points, (size_t)ctx->sizePoints * sizeof(vec2));
+       vec2d* tmp = (vec2d*) realloc (ctx->points, (size_t)ctx->sizePoints * sizeof(vec2d));
         LOG(VKVG_LOG_DBG_ARRAYS, "resize Points: new size(point): %u Ptr: %p -> %p\n", ctx->sizePoints, ctx->points, tmp);
         if (tmp == NULL){
                 ctx->status = VKVG_STATUS_NO_MEMORY;
-               LOG(VKVG_LOG_ERR, "resize PATH failed: new size(byte): %zu\n", ctx->sizePoints * sizeof(vec2));
+               LOG(VKVG_LOG_ERR, "resize PATH failed: new size(byte): %zu\n", ctx->sizePoints * sizeof(vec2d));
                 _clear_path (ctx);
                 return true;
         }
@@ -733,7 +733,7 @@ void _init_descriptor_sets (VkvgContext ctx){
         VK_CHECK_RESULT(vkAllocateDescriptorSets(dev->vkDev, &descriptorSetAllocateInfo, &ctx->dsGrad));
  }
  //populate vertice buff for stroke
-float _build_vb_step (vkvg_context* ctx, double hw, vec2d pL, vec2d p0, vec2d pR, bool isCurve){
+float _build_vb_step (vkvg_context* restrict ctx, double hw, vec2d pL, vec2d p0, vec2d pR, bool isCurve){
         Vertex v = {{0},ctx->curColor, {0,0,-1}};
  
         vec2d v0 = vec2d_sub(p0, pL);
@@ -868,7 +868,7 @@ bool ptInTriangle(vec2d p, vec2d p0, vec2d p1, vec2d p2) {
         return (s>=0) && (t>=0) && (s+t<=D);
  }
  
-void _free_ctx_save (vkvg_context_save_t* sav){
+void _free_ctx_save (vkvg_context_save_t* restrict sav){
         if (sav->dashCount > 0)
                 free (sav->dashes);
         free(sav->selectedFontName);
@@ -885,7 +885,7 @@ void _free_ctx_save (vkvg_context_save_t* sav){
  #define CURVE_ANGLE_TOLERANCE_EPSILON 0.001
  //no floating point arithmetic operation allowed in macro.
  #pragma warning(disable:4127)
-void _recursive_bezier (VkvgContext ctx,
+void _recursive_bezier (VkvgContext restrict ctx,
                                                 double x1, double y1, double x2, double y2,
                                                 double x3, double y3, double x4, double y4,
                                                 unsigned level) {
diff --git a/src/vkvg_fonts.h b/src/vkvg_fonts.h

index ce06786ab4b0959f46b4b759f35aa000da30ff3c..2d4c669695c0ecfda5676f80e4c55c2b8db8c167 100644 (file)
--- a/src/vkvg_fonts.h
+++ b/src/vkvg_fonts.h
@@ -86,7 +86,7 @@ typedef struct {
         uint32_t                        fcNamesCount;   /* Count of resolved names by fontConfig */
         char*                           fontFile;               /* Font file full path*/
         uint32_t                        sizeCount;              /* available font size loaded */
-       _vkvg_font_t*           sizes                   /* loaded font size array */
+       _vkvg_font_t*           sizes;                  /* loaded font size array */
  }_vkvg_font_identity_t;
  
  // Font cache global structure, entry point for all font related operations.
diff --git a/src/vkvg_internal.h b/src/vkvg_internal.h

index 8621663eb969e4d8a39f5a23ef15f61de7594ac2..09ded36b9e21c2b694d6b8e92f4ca6b57c4c1cb9 100644 (file)
--- a/src/vkvg_internal.h
+++ b/src/vkvg_internal.h
@@ -48,7 +48,7 @@
         #define M_2_SQRTPI      1.12837916709551257390  /* 2/sqrt(pi) */
         #define M_SQRT2         1.41421356237309504880  /* sqrt(2) */
         #define M_SQRT1_2       0.70710678118654752440  /* 1/sqrt(2) */
-#endif*/
+#endif
  
  #ifdef DEBUG
  #define LOG(level,...) (vkvg_log_level & level) ? fprintf (stdout, __VA_ARGS__):true;
@@ -56,10 +56,12 @@
  #define LOG
  #endif
  
+
+
  #define PATH_CLOSED_BIT     0x80000000              /* most significant bit of path elmts is closed/open path state */
  #define PATH_HAS_CURVES_BIT 0x40000000              /* 2rd most significant bit of path elmts is curved status
-                                                     * for main path, this indicate that curve datas are present.
-                                                     * For segments, this indicate that the segment is curved or not */
+                                                                                                        * for main path, this indicate that curve datas are present.
+                                                                                                        * For segments, this indicate that the segment is curved or not */
  #define PATH_ELT_MASK       0x3FFFFFFF              /* Bit mask for fetching path element value */
  
  #define ROUNDF(f, c) (((float)((int)((f) * (c))) / (c)))
@@ -68,7 +70,6 @@
  #define EQUF(a, b) (fabsf(a-b)<=FLT_EPSILON)
  #define EQU(a, b) (fabs(a-b)<=DBL_EPSILON)
  
-
  #include "cross_os.h"
  #include "vectors.h"
  #include "cross_mutex.h"
@@ -78,6 +79,6 @@
  //used to store clipping bit on context saving. 8 bit stencil will allow 6 save/restore layer
  #define FB_COLOR_FORMAT VK_FORMAT_B8G8R8A8_UNORM
  #define VKVG_SURFACE_IMGS_REQUIREMENTS VK_IMAGE_USAGE_SAMPLED_BIT|VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT|\
-    VK_IMAGE_USAGE_TRANSFER_DST_BIT|VK_IMAGE_USAGE_TRANSFER_SRC_BIT|VK_FORMAT_FEATURE_BLIT_SRC_BIT
+       VK_IMAGE_USAGE_TRANSFER_DST_BIT|VK_IMAGE_USAGE_TRANSFER_SRC_BIT|VK_FORMAT_FEATURE_BLIT_SRC_BIT
  #define VKVG_FENCE_TIMEOUT UINT64_MAX
  #endif
author	Jean-Philippe Bruyère <jp_bruyere@hotmail.com>
	Thu, 12 Aug 2021 12:21:00 +0000 (14:21 +0200)
committer	Jean-Philippe Bruyère <jp_bruyere@hotmail.com>
	Thu, 12 Aug 2021 12:21:00 +0000 (14:21 +0200)
CMakeLists.txt		patch \| blob \| history
src/vectors.h		patch \| blob \| history
src/vkvg_context_internal.c		patch \| blob \| history
src/vkvg_fonts.h		patch \| blob \| history
src/vkvg_internal.h		patch \| blob \| history