From 8d315a14d503549a6c5baf18f37f0e90df1635fb Mon Sep 17 00:00:00 2001 From: =?utf8?q?Jean-Philippe=20Bruy=C3=A8re?= Date: Thu, 12 Aug 2021 14:21:00 +0200 Subject: [PATCH] implement several sse2 intrinsics inside vec2d, __m128d in vec2d union --- CMakeLists.txt | 2 +- src/vectors.h | 81 ++++++++++++++++++++++++------------- src/vkvg_context_internal.c | 10 ++--- src/vkvg_fonts.h | 2 +- src/vkvg_internal.h | 11 ++--- 5 files changed, 65 insertions(+), 41 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2a66d2b..12d9aa5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,7 +44,7 @@ ELSE() UNSET(ENABLE_DBG_UTILS CACHE) UNSET(ENABLE_WIRED_FILL CACHE) IF (UNIX) - SET(CMAKE_${LANG}_FLAGS "-O3 -march=native -w ") + SET(CMAKE_${LANG}_FLAGS "-O3 -march=native -msse2 -w ") ELSEIF(MSVC) SET(CMAKE_${LANG}_FLAGS "/O2 /TC /W0") ENDIF() diff --git a/src/vectors.h b/src/vectors.h index e0699e8..c4de206 100644 --- a/src/vectors.h +++ b/src/vectors.h @@ -26,20 +26,26 @@ #include typedef union { - float v2si __attribute__ ((vector_size (8))); struct { float x; float y; }; }vec2; -typedef union { - __m128d raw;// __attribute__ ((vector_size (16))); - struct { +#ifdef __SSE2__ + typedef union { + __m128d raw; + struct { + double x; + double y; + }; + }vec2d; +#else + typedef struct { double x; double y; - }; -}vec2d; + }vec2d; +#endif typedef struct { float x; @@ -79,6 +85,7 @@ typedef struct { int16_t x; int16_t y; }vec2i16; + // compute length of float vector 2d vkvg_inline float vec2_length(vec2 v){ return sqrtf (v.x*v.x + v.y*v.y); @@ -111,27 +118,53 @@ vkvg_inline vec2 vec2_norm(vec2 a) float m = sqrtf (a.x*a.x + a.y*a.y); return (vec2){a.x/m, a.y/m}; } -// normalize double vector -vkvg_inline vec2d vec2d_norm(vec2d a) -{ - double m = sqrt (a.x*a.x + a.y*a.y); - return (vec2d){a.x/m, a.y/m}; +// devide 2d vector by scalar +vkvg_inline vec2 vec2_div(vec2 a, float m){ + return (vec2){a.x/m,a.y/m}; +} +// multiply 2d vector by scalar +vkvg_inline vec2 vec2_mult(vec2 a, float m){ + return (vec2){a.x*m,a.y*m}; +} +// compute sum of two double precision vectors +vkvg_inline vec2d vec2d_add (vec2d a, vec2d b){ +#ifdef __SSE2__ + return (vec2d)_mm_add_pd (a.raw, b.raw); +#else + return (vec2d){a.x + b.x, a.y + b.y}; +#endif +} +// compute subbstraction of two double precision vectors +vkvg_inline vec2d vec2d_sub (vec2d a, vec2d b){ +#ifdef __SSE2__ + return (vec2d)_mm_sub_pd (a.raw, b.raw); +#else + return (vec2d){a.x - b.x, a.y - b.y}; +#endif } // multiply 2d vector by scalar vkvg_inline vec2d vec2d_mult(vec2d a, double m){ +#ifdef __SSE2__ + return (vec2d)_mm_mul_pd (a.raw, _mm_set_pd1 (m)); +#else return (vec2d){a.x*m,a.y*m}; -} -// devide 2d vector by scalar -vkvg_inline vec2 vec2_div(vec2 a, float m){ - return (vec2){a.x/m,a.y/m}; +#endif + } vkvg_inline vec2d vec2d_div(vec2d a, double m){ +#ifdef __SSE2__ return (vec2d)_mm_div_pd (a.raw, _mm_set_pd1 (m)); - //return (vec2d){a.x/m,a.y/m}; +#else + return (vec2d){a.x/m,a.y/m}; +#endif } -// multiply 2d vector by scalar -vkvg_inline vec2 vec2_mult(vec2 a, float m){ - return (vec2){a.x*m,a.y*m}; + +// normalize double vector +vkvg_inline vec2d vec2d_norm(vec2d a) +{ + double m = sqrt (a.x*a.x + a.y*a.y); + return (vec2d)vec2d_div (a, m); + //return (vec2d){a.x/m, a.y/m}; } // compute perpendicular vector vkvg_inline vec2d vec2d_perp (vec2d a){ @@ -149,20 +182,10 @@ vkvg_inline vec2 vec2d_to_vec2(vec2d vd){ vkvg_inline vec2 vec2_add (vec2 a, vec2 b){ return (vec2){a.x + b.x, a.y + b.y}; } -// compute sum of two double precision vectors -vkvg_inline vec2d vec2d_add (vec2d a, vec2d b){ - return (vec2d)_mm_add_pd (a.raw, b.raw); - //return (vec2d){a.x + b.x, a.y + b.y}; -} // compute subbstraction of two single precision vectors vkvg_inline vec2 vec2_sub (vec2 a, vec2 b){ return (vec2){a.x - b.x, a.y - b.y}; } -// compute subbstraction of two double precision vectors -vkvg_inline vec2d vec2d_sub (vec2d a, vec2d b){ - return (vec2d)_mm_sub_pd (a.raw, b.raw); - //return (vec2d){a.x - b.x, a.y - b.y}; -} // test equality of two single precision vectors vkvg_inline bool vec2_equ (vec2 a, vec2 b){ return (EQUF(a.x,b.x)&EQUF(a.y,b.y)); diff --git a/src/vkvg_context_internal.c b/src/vkvg_context_internal.c index 694b952..03d9ec0 100644 --- a/src/vkvg_context_internal.c +++ b/src/vkvg_context_internal.c @@ -87,11 +87,11 @@ bool _check_point_array (VkvgContext ctx){ if (ctx->sizePoints - ctx->pointCount > VKVG_ARRAY_THRESHOLD) return false; ctx->sizePoints += VKVG_PTS_SIZE; - vec2* tmp = (vec2*) realloc (ctx->points, (size_t)ctx->sizePoints * sizeof(vec2)); + vec2d* tmp = (vec2d*) realloc (ctx->points, (size_t)ctx->sizePoints * sizeof(vec2d)); LOG(VKVG_LOG_DBG_ARRAYS, "resize Points: new size(point): %u Ptr: %p -> %p\n", ctx->sizePoints, ctx->points, tmp); if (tmp == NULL){ ctx->status = VKVG_STATUS_NO_MEMORY; - LOG(VKVG_LOG_ERR, "resize PATH failed: new size(byte): %zu\n", ctx->sizePoints * sizeof(vec2)); + LOG(VKVG_LOG_ERR, "resize PATH failed: new size(byte): %zu\n", ctx->sizePoints * sizeof(vec2d)); _clear_path (ctx); return true; } @@ -733,7 +733,7 @@ void _init_descriptor_sets (VkvgContext ctx){ VK_CHECK_RESULT(vkAllocateDescriptorSets(dev->vkDev, &descriptorSetAllocateInfo, &ctx->dsGrad)); } //populate vertice buff for stroke -float _build_vb_step (vkvg_context* ctx, double hw, vec2d pL, vec2d p0, vec2d pR, bool isCurve){ +float _build_vb_step (vkvg_context* restrict ctx, double hw, vec2d pL, vec2d p0, vec2d pR, bool isCurve){ Vertex v = {{0},ctx->curColor, {0,0,-1}}; vec2d v0 = vec2d_sub(p0, pL); @@ -868,7 +868,7 @@ bool ptInTriangle(vec2d p, vec2d p0, vec2d p1, vec2d p2) { return (s>=0) && (t>=0) && (s+t<=D); } -void _free_ctx_save (vkvg_context_save_t* sav){ +void _free_ctx_save (vkvg_context_save_t* restrict sav){ if (sav->dashCount > 0) free (sav->dashes); free(sav->selectedFontName); @@ -885,7 +885,7 @@ void _free_ctx_save (vkvg_context_save_t* sav){ #define CURVE_ANGLE_TOLERANCE_EPSILON 0.001 //no floating point arithmetic operation allowed in macro. #pragma warning(disable:4127) -void _recursive_bezier (VkvgContext ctx, +void _recursive_bezier (VkvgContext restrict ctx, double x1, double y1, double x2, double y2, double x3, double y3, double x4, double y4, unsigned level) { diff --git a/src/vkvg_fonts.h b/src/vkvg_fonts.h index ce06786..2d4c669 100644 --- a/src/vkvg_fonts.h +++ b/src/vkvg_fonts.h @@ -86,7 +86,7 @@ typedef struct { uint32_t fcNamesCount; /* Count of resolved names by fontConfig */ char* fontFile; /* Font file full path*/ uint32_t sizeCount; /* available font size loaded */ - _vkvg_font_t* sizes /* loaded font size array */ + _vkvg_font_t* sizes; /* loaded font size array */ }_vkvg_font_identity_t; // Font cache global structure, entry point for all font related operations. diff --git a/src/vkvg_internal.h b/src/vkvg_internal.h index 8621663..09ded36 100644 --- a/src/vkvg_internal.h +++ b/src/vkvg_internal.h @@ -48,7 +48,7 @@ #define M_2_SQRTPI 1.12837916709551257390 /* 2/sqrt(pi) */ #define M_SQRT2 1.41421356237309504880 /* sqrt(2) */ #define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */ -#endif*/ +#endif #ifdef DEBUG #define LOG(level,...) (vkvg_log_level & level) ? fprintf (stdout, __VA_ARGS__):true; @@ -56,10 +56,12 @@ #define LOG #endif + + #define PATH_CLOSED_BIT 0x80000000 /* most significant bit of path elmts is closed/open path state */ #define PATH_HAS_CURVES_BIT 0x40000000 /* 2rd most significant bit of path elmts is curved status - * for main path, this indicate that curve datas are present. - * For segments, this indicate that the segment is curved or not */ + * for main path, this indicate that curve datas are present. + * For segments, this indicate that the segment is curved or not */ #define PATH_ELT_MASK 0x3FFFFFFF /* Bit mask for fetching path element value */ #define ROUNDF(f, c) (((float)((int)((f) * (c))) / (c))) @@ -68,7 +70,6 @@ #define EQUF(a, b) (fabsf(a-b)<=FLT_EPSILON) #define EQU(a, b) (fabs(a-b)<=DBL_EPSILON) - #include "cross_os.h" #include "vectors.h" #include "cross_mutex.h" @@ -78,6 +79,6 @@ //used to store clipping bit on context saving. 8 bit stencil will allow 6 save/restore layer #define FB_COLOR_FORMAT VK_FORMAT_B8G8R8A8_UNORM #define VKVG_SURFACE_IMGS_REQUIREMENTS VK_IMAGE_USAGE_SAMPLED_BIT|VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT|\ - VK_IMAGE_USAGE_TRANSFER_DST_BIT|VK_IMAGE_USAGE_TRANSFER_SRC_BIT|VK_FORMAT_FEATURE_BLIT_SRC_BIT + VK_IMAGE_USAGE_TRANSFER_DST_BIT|VK_IMAGE_USAGE_TRANSFER_SRC_BIT|VK_FORMAT_FEATURE_BLIT_SRC_BIT #define VKVG_FENCE_TIMEOUT UINT64_MAX #endif -- 2.47.3