From 8388454f20eeb10f0609c736b716aabdfd24d323 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Jean-Philippe=20Bruy=C3=A8re?= Date: Thu, 12 Aug 2021 16:00:02 +0200 Subject: [PATCH] take vkimg stride into account when saving surface to png or memory, spaces to tabs --- include/vkvg.h | 36 +- src/cross_mutex.c | 1 - src/nanosvg.h | 8 +- src/stb_image.h | 7104 +++++++++++++++++------------------ src/stb_image_write.h | 1312 +++---- src/vkvg_buff.c | 1 - src/vkvg_buff.h | 16 +- src/vkvg_context.c | 108 +- src/vkvg_context_internal.c | 90 +- src/vkvg_context_internal.h | 186 +- src/vkvg_device.c | 64 +- src/vkvg_device_internal.c | 80 +- src/vkvg_device_internal.h | 102 +- src/vkvg_fonts.c | 87 +- src/vkvg_fonts.h | 86 +- src/vkvg_internal.h | 18 +- src/vkvg_matrix.c | 10 +- src/vkvg_pattern.c | 1 - src/vkvg_pattern.h | 18 +- src/vkvg_surface.c | 51 +- src/vkvg_surface_internal.c | 12 +- src/vkvg_surface_internal.h | 6 +- vkh | 2 +- 23 files changed, 4702 insertions(+), 4697 deletions(-) diff --git a/include/vkvg.h b/include/vkvg.h index a97f2d1..fbd78f5 100644 --- a/include/vkvg.h +++ b/include/vkvg.h @@ -40,26 +40,26 @@ extern "C" { */ /*! @file vkvg.h - * @brief The header of the VKVG library. + * @brief The header of the VKVG library. * - * This is the header file of the VKVG library. It defines all its types and - * declares all its functions. + * This is the header file of the VKVG library. It defines all its types and + * declares all its functions. * - * For more information about how to use this file, see @ref build_include. + * For more information about how to use this file, see @ref build_include. */ /*! @defgroup surface Surface - * @brief Functions and types related to VKVG surface. + * @brief Functions and types related to VKVG surface. * - * This is the reference documentation for creating, using and destroying VKVG - * Surfaces used as backend for drawing operations. + * This is the reference documentation for creating, using and destroying VKVG + * Surfaces used as backend for drawing operations. */ /*! @defgroup context Context - * @brief Functions and types related to VKVG contexts. + * @brief Functions and types related to VKVG contexts. * - * This is the reference documentation for VKVG contexts used to draw on @ref surface. + * This is the reference documentation for VKVG contexts used to draw on @ref surface. */ /*! @defgroup path Path creation and manipulation reference. - * @brief Functions and types related to path edition. + * @brief Functions and types related to path edition. */ #include @@ -253,7 +253,7 @@ typedef struct _vkvg_context_t* VkvgContext; * * A #VkvgSurface represents an image, either as the destination * of a drawing operation or as source when drawing onto another - * surface. To draw to a #VkvgSurface, create a vkvg context + * surface. To draw to a #VkvgSurface, create a vkvg context * with the surface as the target, using #vkvg_create(). * hidden internals. * @@ -267,7 +267,7 @@ typedef struct _vkvg_surface_t* VkvgSurface; * * A #VkvgDevice is required for creating new surfaces. */ -typedef struct _vkvg_device_t* VkvgDevice; +typedef struct _vkvg_device_t* VkvgDevice; /** * @brief Opaque pointer on a Vkvg pattern structure. * @ingroup pattern @@ -284,12 +284,12 @@ typedef struct _vkvg_pattern_t* VkvgPattern; * @ingroup device */ typedef struct { - uint32_t sizePoints; /**< maximum point array size */ - uint32_t sizePathes; /**< maximum path array size */ - uint32_t sizeVertices; /**< maximum size of host vertice cache */ - uint32_t sizeIndices; /**< maximum size of host index cache */ - uint32_t sizeVBO; /**< maximum size of vulkan vertex buffer */ - uint32_t sizeIBO; /**< maximum size of vulkan index buffer */ + uint32_t sizePoints; /**< maximum point array size */ + uint32_t sizePathes; /**< maximum path array size */ + uint32_t sizeVertices; /**< maximum size of host vertice cache */ + uint32_t sizeIndices; /**< maximum size of host index cache */ + uint32_t sizeVBO; /**< maximum size of vulkan vertex buffer */ + uint32_t sizeIBO; /**< maximum size of vulkan index buffer */ } vkvg_debug_stats_t; vkvg_debug_stats_t vkvg_device_get_stats (VkvgDevice dev); diff --git a/src/cross_mutex.c b/src/cross_mutex.c index 2066f28..57de5dc 100644 --- a/src/cross_mutex.c +++ b/src/cross_mutex.c @@ -69,4 +69,3 @@ int MUTEX_DESTROY(MUTEX *mutex) return -1; #endif } - diff --git a/src/nanosvg.h b/src/nanosvg.h index fb9df4b..809bc87 100644 --- a/src/nanosvg.h +++ b/src/nanosvg.h @@ -2729,19 +2729,19 @@ static void nsvg__startElement(void* ud, const char* el, const char** attr) nsvg__pushAttr(p); nsvg__parseEllipse(p, attr); nsvg__popAttr(p); - } else if (strcmp(el, "line") == 0) { + } else if (strcmp(el, "line") == 0) { nsvg__pushAttr(p); nsvg__parseLine(p, attr); nsvg__popAttr(p); - } else if (strcmp(el, "polyline") == 0) { + } else if (strcmp(el, "polyline") == 0) { nsvg__pushAttr(p); nsvg__parsePoly(p, attr, 0); nsvg__popAttr(p); - } else if (strcmp(el, "polygon") == 0) { + } else if (strcmp(el, "polygon") == 0) { nsvg__pushAttr(p); nsvg__parsePoly(p, attr, 1); nsvg__popAttr(p); - } else if (strcmp(el, "linearGradient") == 0) { + } else if (strcmp(el, "linearGradient") == 0) { nsvg__parseGradient(p, attr, NSVG_PAINT_LINEAR_GRADIENT); } else if (strcmp(el, "radialGradient") == 0) { nsvg__parseGradient(p, attr, NSVG_PAINT_RADIAL_GRADIENT); diff --git a/src/stb_image.h b/src/stb_image.h index d9c21bc..8b4af4d 100644 --- a/src/stb_image.h +++ b/src/stb_image.h @@ -1,8 +1,8 @@ /* stb_image - v2.19 - public domain image loader - http://nothings.org/stb - no warranty implied; use at your own risk + no warranty implied; use at your own risk Do this: - #define STB_IMAGE_IMPLEMENTATION + #define STB_IMAGE_IMPLEMENTATION before you include this file in *one* C or C++ file to create the implementation. // i.e. it should look like this: @@ -17,27 +17,27 @@ QUICK NOTES: - Primarily of interest to game developers and other people who can - avoid problematic images and only need the trivial interface + Primarily of interest to game developers and other people who can + avoid problematic images and only need the trivial interface - JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib) - PNG 1/2/4/8/16-bit-per-channel + JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib) + PNG 1/2/4/8/16-bit-per-channel - TGA (not sure what subset, if a subset) - BMP non-1bpp, non-RLE - PSD (composited view only, no extra channels, 8/16 bit-per-channel) + TGA (not sure what subset, if a subset) + BMP non-1bpp, non-RLE + PSD (composited view only, no extra channels, 8/16 bit-per-channel) - GIF (*comp always reports as 4-channel) - HDR (radiance rgbE format) - PIC (Softimage PIC) - PNM (PPM and PGM binary only) + GIF (*comp always reports as 4-channel) + HDR (radiance rgbE format) + PIC (Softimage PIC) + PNM (PPM and PGM binary only) - Animated GIF still needs a proper API, but here's one way to do it: - http://gist.github.com/urraka/685d9a6340b26b830d49 + Animated GIF still needs a proper API, but here's one way to do it: + http://gist.github.com/urraka/685d9a6340b26b830d49 - - decode from memory or through FILE (define STBI_NO_STDIO to remove code) - - decode from arbitrary I/O callbacks - - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON) + - decode from memory or through FILE (define STBI_NO_STDIO to remove code) + - decode from arbitrary I/O callbacks + - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON) Full documentation under "DOCUMENTATION" below. @@ -48,59 +48,59 @@ LICENSE RECENT REVISION HISTORY: - 2.19 (2018-02-11) fix warning - 2.18 (2018-01-30) fix warnings - 2.17 (2018-01-29) bugfix, 1-bit BMP, 16-bitness query, fix warnings - 2.16 (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes - 2.15 (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC - 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs - 2.13 (2016-12-04) experimental 16-bit API, only for PNG so far; fixes - 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes - 2.11 (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64 - RGB-format JPEG; remove white matting in PSD; - allocate large structures on the stack; - correct channel count for PNG & BMP - 2.10 (2016-01-22) avoid warning introduced in 2.09 - 2.09 (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED + 2.19 (2018-02-11) fix warning + 2.18 (2018-01-30) fix warnings + 2.17 (2018-01-29) bugfix, 1-bit BMP, 16-bitness query, fix warnings + 2.16 (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes + 2.15 (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC + 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs + 2.13 (2016-12-04) experimental 16-bit API, only for PNG so far; fixes + 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes + 2.11 (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64 + RGB-format JPEG; remove white matting in PSD; + allocate large structures on the stack; + correct channel count for PNG & BMP + 2.10 (2016-01-22) avoid warning introduced in 2.09 + 2.09 (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED See end of file for full revision history. - ============================ Contributors ========================= - - Image formats Extensions, features - Sean Barrett (jpeg, png, bmp) Jetro Lauha (stbi_info) - Nicolas Schulz (hdr, psd) Martin "SpartanJ" Golini (stbi_info) - Jonathan Dummer (tga) James "moose2000" Brown (iPhone PNG) - Jean-Marc Lienher (gif) Ben "Disch" Wenger (io callbacks) - Tom Seddon (pic) Omar Cornut (1/2/4-bit PNG) - Thatcher Ulrich (psd) Nicolas Guillemot (vertical flip) - Ken Miller (pgm, ppm) Richard Mitton (16-bit PSD) - github:urraka (animated gif) Junggon Kim (PNM comments) - Christopher Forseth (animated gif) Daniel Gibson (16-bit TGA) - socks-the-fox (16-bit PNG) - Jeremy Sawicki (handle all ImageNet JPGs) - Optimizations & bugfixes Mikhail Morozov (1-bit BMP) - Fabian "ryg" Giesen Anael Seghezzi (is-16-bit query) - Arseny Kapoulkine - John-Mark Allen + ============================ Contributors ========================= + + Image formats Extensions, features + Sean Barrett (jpeg, png, bmp) Jetro Lauha (stbi_info) + Nicolas Schulz (hdr, psd) Martin "SpartanJ" Golini (stbi_info) + Jonathan Dummer (tga) James "moose2000" Brown (iPhone PNG) + Jean-Marc Lienher (gif) Ben "Disch" Wenger (io callbacks) + Tom Seddon (pic) Omar Cornut (1/2/4-bit PNG) + Thatcher Ulrich (psd) Nicolas Guillemot (vertical flip) + Ken Miller (pgm, ppm) Richard Mitton (16-bit PSD) + github:urraka (animated gif) Junggon Kim (PNM comments) + Christopher Forseth (animated gif) Daniel Gibson (16-bit TGA) + socks-the-fox (16-bit PNG) + Jeremy Sawicki (handle all ImageNet JPGs) + Optimizations & bugfixes Mikhail Morozov (1-bit BMP) + Fabian "ryg" Giesen Anael Seghezzi (is-16-bit query) + Arseny Kapoulkine + John-Mark Allen Bug & warning fixes - Marc LeBlanc David Woo Guillaume George Martins Mozeiko - Christpher Lloyd Jerry Jansson Joseph Thomson Phil Jordan - Dave Moore Roy Eltham Hayaki Saito Nathan Reed - Won Chun Luke Graham Johan Duparc Nick Verigakis - the Horde3D community Thomas Ruf Ronny Chevalier github:rlyeh - Janez Zemva John Bartholomew Michal Cichon github:romigrou - Jonathan Blow Ken Hamada Tero Hanninen github:svdijk - Laurent Gomila Cort Stratton Sergio Gonzalez github:snagar - Aruelien Pocheville Thibault Reuille Cass Everitt github:Zelex - Ryamond Barbiero Paul Du Bois Engin Manap github:grim210 - Aldo Culquicondor Philipp Wiesemann Dale Weiler github:sammyhw - Oriol Ferrer Mesia Josh Tobin Matthew Gregan github:phprus - Julian Raschke Gregory Mullen Baldur Karlsson github:poppolopoppo - Christian Floisand Kevin Schmidt github:darealshinji - Blazej Dariusz Roszkowski github:Michaelangel007 + Marc LeBlanc David Woo Guillaume George Martins Mozeiko + Christpher Lloyd Jerry Jansson Joseph Thomson Phil Jordan + Dave Moore Roy Eltham Hayaki Saito Nathan Reed + Won Chun Luke Graham Johan Duparc Nick Verigakis + the Horde3D community Thomas Ruf Ronny Chevalier github:rlyeh + Janez Zemva John Bartholomew Michal Cichon github:romigrou + Jonathan Blow Ken Hamada Tero Hanninen github:svdijk + Laurent Gomila Cort Stratton Sergio Gonzalez github:snagar + Aruelien Pocheville Thibault Reuille Cass Everitt github:Zelex + Ryamond Barbiero Paul Du Bois Engin Manap github:grim210 + Aldo Culquicondor Philipp Wiesemann Dale Weiler github:sammyhw + Oriol Ferrer Mesia Josh Tobin Matthew Gregan github:phprus + Julian Raschke Gregory Mullen Baldur Karlsson github:poppolopoppo + Christian Floisand Kevin Schmidt github:darealshinji + Blazej Dariusz Roszkowski github:Michaelangel007 */ #ifndef STBI_INCLUDE_STB_IMAGE_H @@ -109,24 +109,24 @@ RECENT REVISION HISTORY: // DOCUMENTATION // // Limitations: -// - no 12-bit-per-channel JPEG -// - no JPEGs with arithmetic coding -// - GIF always returns *comp=4 +// - no 12-bit-per-channel JPEG +// - no JPEGs with arithmetic coding +// - GIF always returns *comp=4 // // Basic usage (see HDR discussion below for HDR usage): -// int x,y,n; -// unsigned char *data = stbi_load(filename, &x, &y, &n, 0); -// // ... process data if not NULL ... -// // ... x = width, y = height, n = # 8-bit components per pixel ... -// // ... replace '0' with '1'..'4' to force that many components per pixel -// // ... but 'n' will always be the number that it would have been if you said 0 -// stbi_image_free(data) +// int x,y,n; +// unsigned char *data = stbi_load(filename, &x, &y, &n, 0); +// // ... process data if not NULL ... +// // ... x = width, y = height, n = # 8-bit components per pixel ... +// // ... replace '0' with '1'..'4' to force that many components per pixel +// // ... but 'n' will always be the number that it would have been if you said 0 +// stbi_image_free(data) // // Standard parameters: -// int *x -- outputs image width in pixels -// int *y -- outputs image height in pixels -// int *channels_in_file -- outputs # of image components in image file -// int desired_channels -- if non-zero, # of image components requested in result +// int *x -- outputs image width in pixels +// int *y -- outputs image height in pixels +// int *channels_in_file -- outputs # of image components in image file +// int desired_channels -- if non-zero, # of image components requested in result // // The return value from an image loader is an 'unsigned char *' which points // to the pixel data, or NULL on an allocation failure or if the image is @@ -144,11 +144,11 @@ RECENT REVISION HISTORY: // An output image with N components has the following components interleaved // in this order in each pixel: // -// N=#comp components -// 1 grey -// 2 grey, alpha -// 3 red, green, blue -// 4 red, green, blue, alpha +// N=#comp components +// 1 grey +// 2 grey, alpha +// 3 red, green, blue +// 4 red, green, blue, alpha // // If image loading fails for any reason, the return value will be NULL, // and *x, *y, *channels_in_file will be unchanged. The function @@ -165,9 +165,9 @@ RECENT REVISION HISTORY: // // stb libraries are designed with the following priorities: // -// 1. easy to use -// 2. easy to maintain -// 3. good performance +// 1. easy to use +// 2. easy to maintain +// 3. good performance // // Sometimes I let "good performance" creep up in priority over "easy to maintain", // and for best performance I may provide less-easy-to-use APIs that give higher @@ -178,9 +178,9 @@ RECENT REVISION HISTORY: // Some secondary priorities arise directly from the first two, some of which // make more explicit reasons why performance can't be emphasized. // -// - Portable ("ease of use") -// - Small source code footprint ("easy to maintain") -// - No dependencies ("ease of use") +// - Portable ("ease of use") +// - Small source code footprint ("easy to maintain") +// - No dependencies ("ease of use") // // =========================================================================== // @@ -226,8 +226,8 @@ RECENT REVISION HISTORY: // LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1; // both of these constants can be reconfigured through this interface: // -// stbi_hdr_to_ldr_gamma(2.2f); -// stbi_hdr_to_ldr_scale(1.0f); +// stbi_hdr_to_ldr_gamma(2.2f); +// stbi_hdr_to_ldr_scale(1.0f); // // (note, do not use _inverse_ constants; stbi_image will invert them // appropriately). @@ -235,21 +235,21 @@ RECENT REVISION HISTORY: // Additionally, there is a new, parallel interface for loading files as // (linear) floats to preserve the full dynamic range: // -// float *data = stbi_loadf(filename, &x, &y, &n, 0); +// float *data = stbi_loadf(filename, &x, &y, &n, 0); // // If you load LDR images through this interface, those images will // be promoted to floating point values, run through the inverse of // constants corresponding to the above: // -// stbi_ldr_to_hdr_scale(1.0f); -// stbi_ldr_to_hdr_gamma(2.2f); +// stbi_ldr_to_hdr_scale(1.0f); +// stbi_ldr_to_hdr_gamma(2.2f); // // Finally, given a filename (or an open file or memory block--see header // file for details) containing image data, you can query for the "most // appropriate" interface to use (that is, whether the image is HDR or // not), using: // -// stbi_is_hdr(char *filename); +// stbi_is_hdr(char *filename); // // =========================================================================== // @@ -270,36 +270,36 @@ RECENT REVISION HISTORY: // // ADDITIONAL CONFIGURATION // -// - You can suppress implementation of any of the decoders to reduce -// your code footprint by #defining one or more of the following -// symbols before creating the implementation. +// - You can suppress implementation of any of the decoders to reduce +// your code footprint by #defining one or more of the following +// symbols before creating the implementation. // -// STBI_NO_JPEG -// STBI_NO_PNG -// STBI_NO_BMP -// STBI_NO_PSD -// STBI_NO_TGA -// STBI_NO_GIF -// STBI_NO_HDR -// STBI_NO_PIC -// STBI_NO_PNM (.ppm and .pgm) +// STBI_NO_JPEG +// STBI_NO_PNG +// STBI_NO_BMP +// STBI_NO_PSD +// STBI_NO_TGA +// STBI_NO_GIF +// STBI_NO_HDR +// STBI_NO_PIC +// STBI_NO_PNM (.ppm and .pgm) // -// - You can request *only* certain decoders and suppress all other ones -// (this will be more forward-compatible, as addition of new decoders -// doesn't require you to disable them explicitly): +// - You can request *only* certain decoders and suppress all other ones +// (this will be more forward-compatible, as addition of new decoders +// doesn't require you to disable them explicitly): // -// STBI_ONLY_JPEG -// STBI_ONLY_PNG -// STBI_ONLY_BMP -// STBI_ONLY_PSD -// STBI_ONLY_TGA -// STBI_ONLY_GIF -// STBI_ONLY_HDR -// STBI_ONLY_PIC -// STBI_ONLY_PNM (.ppm and .pgm) +// STBI_ONLY_JPEG +// STBI_ONLY_PNG +// STBI_ONLY_BMP +// STBI_ONLY_PSD +// STBI_ONLY_TGA +// STBI_ONLY_GIF +// STBI_ONLY_HDR +// STBI_ONLY_PIC +// STBI_ONLY_PNM (.ppm and .pgm) // -// - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still -// want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB +// - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still +// want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB // @@ -313,9 +313,9 @@ enum { STBI_default = 0, // only used for desired_channels - STBI_grey = 1, + STBI_grey = 1, STBI_grey_alpha = 2, - STBI_rgb = 3, + STBI_rgb = 3, STBI_rgb_alpha = 4 }; @@ -343,9 +343,9 @@ extern "C" { typedef struct { - int (*read) (void *user,char *data,int size); // fill 'data' with 'size' bytes. return number of bytes actually read - void (*skip) (void *user,int n); // skip the next 'n' bytes, or 'unget' the last -n bytes if negative - int (*eof) (void *user); // returns nonzero if we are at end of file/data + int (*read) (void *user,char *data,int size); // fill 'data' with 'size' bytes. return number of bytes actually read + void (*skip) (void *user,int n); // skip the next 'n' bytes, or 'unget' the last -n bytes if negative + int (*eof) (void *user); // returns nonzero if we are at end of file/data } stbi_io_callbacks; //////////////////////////////////// @@ -353,15 +353,15 @@ typedef struct // 8-bits-per-channel interface // -STBIDEF stbi_uc *stbi_load_from_memory (stbi_uc const *buffer, int len , int *x, int *y, int *channels_in_file, int desired_channels); -STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk , void *user, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_uc *stbi_load_from_memory (stbi_uc const *buffer, int len , int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk , void *user, int *x, int *y, int *channels_in_file, int desired_channels); #ifndef STBI_NO_GIF STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp); #endif #ifndef STBI_NO_STDIO -STBIDEF stbi_uc *stbi_load (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_uc *stbi_load (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); STBIDEF stbi_uc *stbi_load_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); // for stbi_load_from_file, file pointer is left pointing immediately after image #endif @@ -371,11 +371,11 @@ STBIDEF stbi_uc *stbi_load_from_file (FILE *f, int *x, int *y, int *channels_in // 16-bits-per-channel interface // -STBIDEF stbi_us *stbi_load_16_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_us *stbi_load_16_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels); STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels); #ifndef STBI_NO_STDIO -STBIDEF stbi_us *stbi_load_16 (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_us *stbi_load_16 (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); #endif @@ -384,52 +384,52 @@ STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_i // float-per-channel interface // #ifndef STBI_NO_LINEAR - STBIDEF float *stbi_loadf_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels); - STBIDEF float *stbi_loadf_from_callbacks (stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels); + STBIDEF float *stbi_loadf_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels); + STBIDEF float *stbi_loadf_from_callbacks (stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels); #ifndef STBI_NO_STDIO - STBIDEF float *stbi_loadf (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); - STBIDEF float *stbi_loadf_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); + STBIDEF float *stbi_loadf (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); + STBIDEF float *stbi_loadf_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); #endif #endif #ifndef STBI_NO_HDR - STBIDEF void stbi_hdr_to_ldr_gamma(float gamma); - STBIDEF void stbi_hdr_to_ldr_scale(float scale); + STBIDEF void stbi_hdr_to_ldr_gamma(float gamma); + STBIDEF void stbi_hdr_to_ldr_scale(float scale); #endif // STBI_NO_HDR #ifndef STBI_NO_LINEAR - STBIDEF void stbi_ldr_to_hdr_gamma(float gamma); - STBIDEF void stbi_ldr_to_hdr_scale(float scale); + STBIDEF void stbi_ldr_to_hdr_gamma(float gamma); + STBIDEF void stbi_ldr_to_hdr_scale(float scale); #endif // STBI_NO_LINEAR // stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR -STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user); -STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len); +STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user); +STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len); #ifndef STBI_NO_STDIO -STBIDEF int stbi_is_hdr (char const *filename); -STBIDEF int stbi_is_hdr_from_file(FILE *f); +STBIDEF int stbi_is_hdr (char const *filename); +STBIDEF int stbi_is_hdr_from_file(FILE *f); #endif // STBI_NO_STDIO // get a VERY brief reason for failure // NOT THREADSAFE -STBIDEF const char *stbi_failure_reason (void); +STBIDEF const char *stbi_failure_reason (void); // free the loaded image -- this is just free() -STBIDEF void stbi_image_free (void *retval_from_stbi_load); +STBIDEF void stbi_image_free (void *retval_from_stbi_load); // get image dimensions & components without fully decoding -STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); -STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp); -STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len); -STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *clbk, void *user); +STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); +STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp); +STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len); +STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *clbk, void *user); #ifndef STBI_NO_STDIO -STBIDEF int stbi_info (char const *filename, int *x, int *y, int *comp); -STBIDEF int stbi_info_from_file (FILE *f, int *x, int *y, int *comp); -STBIDEF int stbi_is_16_bit (char const *filename); -STBIDEF int stbi_is_16_bit_from_file(FILE *f); +STBIDEF int stbi_info (char const *filename, int *x, int *y, int *comp); +STBIDEF int stbi_info_from_file (FILE *f, int *x, int *y, int *comp); +STBIDEF int stbi_is_16_bit (char const *filename); +STBIDEF int stbi_is_16_bit_from_file(FILE *f); #endif @@ -451,10 +451,10 @@ STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip); STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen); STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header); STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen); -STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); +STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen); -STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); +STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); #ifdef __cplusplus @@ -463,7 +463,7 @@ STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const ch // // -//// end header file ///////////////////////////////////////////////////// +//// end header file ///////////////////////////////////////////////////// #endif // STBI_INCLUDE_STB_IMAGE_H #ifdef STB_IMAGE_IMPLEMENTATION @@ -539,24 +539,24 @@ STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const ch #ifdef _MSC_VER typedef unsigned short stbi__uint16; -typedef signed short stbi__int16; +typedef signed short stbi__int16; typedef unsigned int stbi__uint32; -typedef signed int stbi__int32; +typedef signed int stbi__int32; #else #include typedef uint16_t stbi__uint16; -typedef int16_t stbi__int16; +typedef int16_t stbi__int16; typedef uint32_t stbi__uint32; -typedef int32_t stbi__int32; +typedef int32_t stbi__int32; #endif // should produce compiler error if size is wrong typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1]; #ifdef _MSC_VER -#define STBI_NOTUSED(v) (void)(v) +#define STBI_NOTUSED(v) (void)(v) #else -#define STBI_NOTUSED(v) (void)sizeof(v) +#define STBI_NOTUSED(v) (void)sizeof(v) #endif #ifdef _MSC_VER @@ -578,9 +578,9 @@ typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1]; #endif #ifndef STBI_MALLOC -#define STBI_MALLOC(sz) malloc(sz) -#define STBI_REALLOC(p,newsz) realloc(p,newsz) -#define STBI_FREE(p) free(p) +#define STBI_MALLOC(sz) malloc(sz) +#define STBI_REALLOC(p,newsz) realloc(p,newsz) +#define STBI_FREE(p) free(p) #endif #ifndef STBI_REALLOC_SIZED @@ -639,9 +639,9 @@ static int stbi__cpuid3(void) { int res; __asm { - mov eax,1 - cpuid - mov res,edx + mov eax,1 + cpuid + mov res,edx } return res; } @@ -684,7 +684,7 @@ static int stbi__sse2_available(void) /////////////////////////////////////////////// // -// stbi__context struct and start_xxx functions +// stbi__context struct and start_xxx functions // stbi__context structure is our basic context used by all images, so it // contains all the IO context, plus some basic image information @@ -784,60 +784,60 @@ typedef struct } stbi__result_info; #ifndef STBI_NO_JPEG -static int stbi__jpeg_test(stbi__context *s); -static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); -static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp); +static int stbi__jpeg_test(stbi__context *s); +static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp); #endif #ifndef STBI_NO_PNG -static int stbi__png_test(stbi__context *s); -static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); -static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp); -static int stbi__png_is16(stbi__context *s); +static int stbi__png_test(stbi__context *s); +static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp); +static int stbi__png_is16(stbi__context *s); #endif #ifndef STBI_NO_BMP -static int stbi__bmp_test(stbi__context *s); -static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); -static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp); +static int stbi__bmp_test(stbi__context *s); +static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp); #endif #ifndef STBI_NO_TGA -static int stbi__tga_test(stbi__context *s); -static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); -static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp); +static int stbi__tga_test(stbi__context *s); +static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp); #endif #ifndef STBI_NO_PSD -static int stbi__psd_test(stbi__context *s); -static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc); -static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp); -static int stbi__psd_is16(stbi__context *s); +static int stbi__psd_test(stbi__context *s); +static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc); +static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp); +static int stbi__psd_is16(stbi__context *s); #endif #ifndef STBI_NO_HDR -static int stbi__hdr_test(stbi__context *s); +static int stbi__hdr_test(stbi__context *s); static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); -static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp); +static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp); #endif #ifndef STBI_NO_PIC -static int stbi__pic_test(stbi__context *s); -static void *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); -static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp); +static int stbi__pic_test(stbi__context *s); +static void *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp); #endif #ifndef STBI_NO_GIF -static int stbi__gif_test(stbi__context *s); -static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); -static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp); -static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp); +static int stbi__gif_test(stbi__context *s); +static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp); +static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp); #endif #ifndef STBI_NO_PNM -static int stbi__pnm_test(stbi__context *s); -static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); -static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp); +static int stbi__pnm_test(stbi__context *s); +static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp); #endif // this is not threadsafe @@ -856,7 +856,7 @@ static int stbi__err(const char *str) static void *stbi__malloc(size_t size) { - return STBI_MALLOC(size); + return STBI_MALLOC(size); } // stb_image uses ints pervasively, including for offset calculations. @@ -901,7 +901,7 @@ static int stbi__mad2sizes_valid(int a, int b, int add) static int stbi__mad3sizes_valid(int a, int b, int c, int add) { return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) && - stbi__addsizes_valid(a*b*c, add); + stbi__addsizes_valid(a*b*c, add); } // returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow @@ -909,7 +909,7 @@ static int stbi__mad3sizes_valid(int a, int b, int c, int add) static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add) { return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) && - stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add); + stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add); } #endif @@ -959,14 +959,14 @@ static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp); #endif #ifndef STBI_NO_HDR -static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp); +static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp); #endif static int stbi__vertically_flip_on_load = 0; STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip) { - stbi__vertically_flip_on_load = flag_true_if_should_flip; + stbi__vertically_flip_on_load = flag_true_if_should_flip; } static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc) @@ -1000,15 +1000,15 @@ static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int re #ifndef STBI_NO_HDR if (stbi__hdr_test(s)) { - float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri); - return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp); + float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri); + return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp); } #endif #ifndef STBI_NO_TGA // test tga last because it's a crappy test! if (stbi__tga_test(s)) - return stbi__tga_load(s,x,y,comp,req_comp, ri); + return stbi__tga_load(s,x,y,comp,req_comp, ri); #endif return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt"); @@ -1024,7 +1024,7 @@ static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int chan if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory"); for (i = 0; i < img_len; ++i) - reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling + reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling STBI_FREE(orig); return reduced; @@ -1040,7 +1040,7 @@ static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int chan if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory"); for (i = 0; i < img_len; ++i) - enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff + enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff STBI_FREE(orig); return enlarged; @@ -1054,19 +1054,19 @@ static void stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel) stbi_uc *bytes = (stbi_uc *)image; for (row = 0; row < (h>>1); row++) { - stbi_uc *row0 = bytes + row*bytes_per_row; - stbi_uc *row1 = bytes + (h - row - 1)*bytes_per_row; - // swap row0 with row1 - size_t bytes_left = bytes_per_row; - while (bytes_left) { - size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp); - memcpy(temp, row0, bytes_copy); - memcpy(row0, row1, bytes_copy); - memcpy(row1, temp, bytes_copy); - row0 += bytes_copy; - row1 += bytes_copy; - bytes_left -= bytes_copy; - } + stbi_uc *row0 = bytes + row*bytes_per_row; + stbi_uc *row1 = bytes + (h - row - 1)*bytes_per_row; + // swap row0 with row1 + size_t bytes_left = bytes_per_row; + while (bytes_left) { + size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp); + memcpy(temp, row0, bytes_copy); + memcpy(row0, row1, bytes_copy); + memcpy(row1, temp, bytes_copy); + row0 += bytes_copy; + row1 += bytes_copy; + bytes_left -= bytes_copy; + } } } @@ -1077,8 +1077,8 @@ static void stbi__vertical_flip_slices(void *image, int w, int h, int z, int byt stbi_uc *bytes = (stbi_uc *)image; for (slice = 0; slice < z; ++slice) { - stbi__vertical_flip(bytes, w, h, bytes_per_pixel); - bytes += slice_size; + stbi__vertical_flip(bytes, w, h, bytes_per_pixel); + bytes += slice_size; } } @@ -1088,19 +1088,19 @@ static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8); if (result == NULL) - return NULL; + return NULL; if (ri.bits_per_channel != 8) { - STBI_ASSERT(ri.bits_per_channel == 16); - result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp); - ri.bits_per_channel = 8; + STBI_ASSERT(ri.bits_per_channel == 16); + result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp); + ri.bits_per_channel = 8; } // @TODO: move stbi__convert_format to here if (stbi__vertically_flip_on_load) { - int channels = req_comp ? req_comp : *comp; - stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc)); + int channels = req_comp ? req_comp : *comp; + stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc)); } return (unsigned char *) result; @@ -1112,20 +1112,20 @@ static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16); if (result == NULL) - return NULL; + return NULL; if (ri.bits_per_channel != 16) { - STBI_ASSERT(ri.bits_per_channel == 8); - result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp); - ri.bits_per_channel = 16; + STBI_ASSERT(ri.bits_per_channel == 8); + result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp); + ri.bits_per_channel = 16; } // @TODO: move stbi__convert_format16 to here // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision if (stbi__vertically_flip_on_load) { - int channels = req_comp ? req_comp : *comp; - stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16)); + int channels = req_comp ? req_comp : *comp; + stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16)); } return (stbi__uint16 *) result; @@ -1135,8 +1135,8 @@ static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp) { if (stbi__vertically_flip_on_load && result != NULL) { - int channels = req_comp ? req_comp : *comp; - stbi__vertical_flip(result, *x, *y, channels * sizeof(float)); + int channels = req_comp ? req_comp : *comp; + stbi__vertical_flip(result, *x, *y, channels * sizeof(float)); } } #endif @@ -1148,7 +1148,7 @@ static FILE *stbi__fopen(char const *filename, char const *mode) FILE *f; #if defined(_MSC_VER) && _MSC_VER >= 1400 if (0 != fopen_s(&f, filename, mode)) - f=0; + f=0; #else f = fopen(filename, mode); #endif @@ -1173,8 +1173,8 @@ STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req stbi__start_file(&s,f); result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); if (result) { - // need to 'unget' all the characters in the IO buffer - fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR); + // need to 'unget' all the characters in the IO buffer + fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR); } return result; } @@ -1186,8 +1186,8 @@ STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, stbi__start_file(&s,f); result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp); if (result) { - // need to 'unget' all the characters in the IO buffer - fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR); + // need to 'unget' all the characters in the IO buffer + fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR); } return result; } @@ -1242,7 +1242,7 @@ STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int * result = (unsigned char*) stbi__load_gif_main(&s, delays, x, y, z, comp, req_comp); if (stbi__vertically_flip_on_load) { - stbi__vertical_flip_slices( result, *x, *y, *z, *comp ); + stbi__vertical_flip_slices( result, *x, *y, *z, *comp ); } return result; @@ -1255,16 +1255,16 @@ static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int unsigned char *data; #ifndef STBI_NO_HDR if (stbi__hdr_test(s)) { - stbi__result_info ri; - float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri); - if (hdr_data) - stbi__float_postprocess(hdr_data,x,y,comp,req_comp); - return hdr_data; + stbi__result_info ri; + float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri); + if (hdr_data) + stbi__float_postprocess(hdr_data,x,y,comp,req_comp); + return hdr_data; } #endif data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp); if (data) - return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp); + return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp); return stbi__errpf("unknown image type", "Image not of any known type, or corrupt"); } @@ -1321,13 +1321,13 @@ STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len) } #ifndef STBI_NO_STDIO -STBIDEF int stbi_is_hdr (char const *filename) +STBIDEF int stbi_is_hdr (char const *filename) { FILE *f = stbi__fopen(filename, "rb"); int result=0; if (f) { - result = stbi_is_hdr_from_file(f); - fclose(f); + result = stbi_is_hdr_from_file(f); + fclose(f); } return result; } @@ -1349,7 +1349,7 @@ STBIDEF int stbi_is_hdr_from_file(FILE *f) } #endif // !STBI_NO_STDIO -STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user) +STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user) { #ifndef STBI_NO_HDR stbi__context s; @@ -1391,25 +1391,25 @@ static void stbi__refill_buffer(stbi__context *s) { int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen); if (n == 0) { - // at end of file, treat same as if from memory, but need to handle case - // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file - s->read_from_callbacks = 0; - s->img_buffer = s->buffer_start; - s->img_buffer_end = s->buffer_start+1; - *s->img_buffer = 0; + // at end of file, treat same as if from memory, but need to handle case + // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file + s->read_from_callbacks = 0; + s->img_buffer = s->buffer_start; + s->img_buffer_end = s->buffer_start+1; + *s->img_buffer = 0; } else { - s->img_buffer = s->buffer_start; - s->img_buffer_end = s->buffer_start + n; + s->img_buffer = s->buffer_start; + s->img_buffer_end = s->buffer_start + n; } } stbi_inline static stbi_uc stbi__get8(stbi__context *s) { if (s->img_buffer < s->img_buffer_end) - return *s->img_buffer++; + return *s->img_buffer++; if (s->read_from_callbacks) { - stbi__refill_buffer(s); - return *s->img_buffer++; + stbi__refill_buffer(s); + return *s->img_buffer++; } return 0; } @@ -1417,10 +1417,10 @@ stbi_inline static stbi_uc stbi__get8(stbi__context *s) stbi_inline static int stbi__at_eof(stbi__context *s) { if (s->io.read) { - if (!(s->io.eof)(s->io_user_data)) return 0; - // if feof() is true, check if buffer = end - // special case: we've only got the special 0 character at the end - if (s->read_from_callbacks == 0) return 1; + if (!(s->io.eof)(s->io_user_data)) return 0; + // if feof() is true, check if buffer = end + // special case: we've only got the special 0 character at the end + if (s->read_from_callbacks == 0) return 1; } return s->img_buffer >= s->img_buffer_end; @@ -1429,16 +1429,16 @@ stbi_inline static int stbi__at_eof(stbi__context *s) static void stbi__skip(stbi__context *s, int n) { if (n < 0) { - s->img_buffer = s->img_buffer_end; - return; + s->img_buffer = s->img_buffer_end; + return; } if (s->io.read) { - int blen = (int) (s->img_buffer_end - s->img_buffer); - if (blen < n) { - s->img_buffer = s->img_buffer_end; - (s->io.skip)(s->io_user_data, n - blen); - return; - } + int blen = (int) (s->img_buffer_end - s->img_buffer); + if (blen < n) { + s->img_buffer = s->img_buffer_end; + (s->io.skip)(s->io_user_data, n - blen); + return; + } } s->img_buffer += n; } @@ -1446,25 +1446,25 @@ static void stbi__skip(stbi__context *s, int n) static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n) { if (s->io.read) { - int blen = (int) (s->img_buffer_end - s->img_buffer); - if (blen < n) { - int res, count; + int blen = (int) (s->img_buffer_end - s->img_buffer); + if (blen < n) { + int res, count; - memcpy(buffer, s->img_buffer, blen); + memcpy(buffer, s->img_buffer, blen); - count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen); - res = (count == (n-blen)); - s->img_buffer = s->img_buffer_end; - return res; - } + count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen); + res = (count == (n-blen)); + s->img_buffer = s->img_buffer_end; + return res; + } } if (s->img_buffer+n <= s->img_buffer_end) { - memcpy(buffer, s->img_buffer, n); - s->img_buffer += n; - return 1; + memcpy(buffer, s->img_buffer, n); + s->img_buffer += n; + return 1; } else - return 0; + return 0; } static int stbi__get16be(stbi__context *s) @@ -1497,19 +1497,19 @@ static stbi__uint32 stbi__get32le(stbi__context *s) } #endif -#define STBI__BYTECAST(x) ((stbi_uc) ((x) & 255)) // truncate int to byte without warnings +#define STBI__BYTECAST(x) ((stbi_uc) ((x) & 255)) // truncate int to byte without warnings ////////////////////////////////////////////////////////////////////////////// // -// generic converter from built-in img_n to req_comp -// individual types do this automatically as much as possible (e.g. jpeg -// does all cases internally since it needs to colorspace convert anyway, -// and it never has alpha, so very few cases ). png can automatically -// interleave an alpha=255 channel, but falls back to this for other cases +// generic converter from built-in img_n to req_comp +// individual types do this automatically as much as possible (e.g. jpeg +// does all cases internally since it needs to colorspace convert anyway, +// and it never has alpha, so very few cases ). png can automatically +// interleave an alpha=255 channel, but falls back to this for other cases // -// assume data buffer is malloced, so malloc a new one and free that one -// only failure mode is malloc failing +// assume data buffer is malloced, so malloc a new one and free that one +// only failure mode is malloc failing static stbi_uc stbi__compute_y(int r, int g, int b) { @@ -1526,34 +1526,34 @@ static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int r good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0); if (good == NULL) { - STBI_FREE(data); - return stbi__errpuc("outofmem", "Out of memory"); + STBI_FREE(data); + return stbi__errpuc("outofmem", "Out of memory"); } for (j=0; j < (int) y; ++j) { - unsigned char *src = data + j * x * img_n ; - unsigned char *dest = good + j * x * req_comp; - - #define STBI__COMBO(a,b) ((a)*8+(b)) - #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) - // convert source image with img_n components to one with req_comp components; - // avoid switch per pixel, so use switch per scanline and massive macros - switch (STBI__COMBO(img_n, req_comp)) { - STBI__CASE(1,2) { dest[0]=src[0], dest[1]=255; } break; - STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; - STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=255; } break; - STBI__CASE(2,1) { dest[0]=src[0]; } break; - STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; - STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; } break; - STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255; } break; - STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break; - STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = 255; } break; - STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break; - STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = src[3]; } break; - STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; } break; - default: STBI_ASSERT(0); - } - #undef STBI__CASE + unsigned char *src = data + j * x * img_n ; + unsigned char *dest = good + j * x * req_comp; + + #define STBI__COMBO(a,b) ((a)*8+(b)) + #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) + // convert source image with img_n components to one with req_comp components; + // avoid switch per pixel, so use switch per scanline and massive macros + switch (STBI__COMBO(img_n, req_comp)) { + STBI__CASE(1,2) { dest[0]=src[0], dest[1]=255; } break; + STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=255; } break; + STBI__CASE(2,1) { dest[0]=src[0]; } break; + STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; } break; + STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255; } break; + STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break; + STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = 255; } break; + STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break; + STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = src[3]; } break; + STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; } break; + default: STBI_ASSERT(0); + } + #undef STBI__CASE } STBI_FREE(data); @@ -1575,34 +1575,34 @@ static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int r good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2); if (good == NULL) { - STBI_FREE(data); - return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory"); + STBI_FREE(data); + return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory"); } for (j=0; j < (int) y; ++j) { - stbi__uint16 *src = data + j * x * img_n ; - stbi__uint16 *dest = good + j * x * req_comp; - - #define STBI__COMBO(a,b) ((a)*8+(b)) - #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) - // convert source image with img_n components to one with req_comp components; - // avoid switch per pixel, so use switch per scanline and massive macros - switch (STBI__COMBO(img_n, req_comp)) { - STBI__CASE(1,2) { dest[0]=src[0], dest[1]=0xffff; } break; - STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; - STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=0xffff; } break; - STBI__CASE(2,1) { dest[0]=src[0]; } break; - STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; - STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; } break; - STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=0xffff; } break; - STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break; - STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = 0xffff; } break; - STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break; - STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = src[3]; } break; - STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; } break; - default: STBI_ASSERT(0); - } - #undef STBI__CASE + stbi__uint16 *src = data + j * x * img_n ; + stbi__uint16 *dest = good + j * x * req_comp; + + #define STBI__COMBO(a,b) ((a)*8+(b)) + #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) + // convert source image with img_n components to one with req_comp components; + // avoid switch per pixel, so use switch per scanline and massive macros + switch (STBI__COMBO(img_n, req_comp)) { + STBI__CASE(1,2) { dest[0]=src[0], dest[1]=0xffff; } break; + STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=0xffff; } break; + STBI__CASE(2,1) { dest[0]=src[0]; } break; + STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; } break; + STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=0xffff; } break; + STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break; + STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = 0xffff; } break; + STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break; + STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = src[3]; } break; + STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; } break; + default: STBI_ASSERT(0); + } + #undef STBI__CASE } STBI_FREE(data); @@ -1620,10 +1620,10 @@ static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp) // compute number of non-alpha components if (comp & 1) n = comp; else n = comp-1; for (i=0; i < x*y; ++i) { - for (k=0; k < n; ++k) { - output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale); - } - if (k < comp) output[i*comp + k] = data[i*comp+k]/255.0f; + for (k=0; k < n; ++k) { + output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale); + } + if (k < comp) output[i*comp + k] = data[i*comp+k]/255.0f; } STBI_FREE(data); return output; @@ -1631,8 +1631,8 @@ static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp) #endif #ifndef STBI_NO_HDR -#define stbi__float2int(x) ((int) (x)) -static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp) +#define stbi__float2int(x) ((int) (x)) +static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp) { int i,k,n; stbi_uc *output; @@ -1642,18 +1642,18 @@ static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp) // compute number of non-alpha components if (comp & 1) n = comp; else n = comp-1; for (i=0; i < x*y; ++i) { - for (k=0; k < n; ++k) { - float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f; - if (z < 0) z = 0; - if (z > 255) z = 255; - output[i*comp + k] = (stbi_uc) stbi__float2int(z); - } - if (k < comp) { - float z = data[i*comp+k] * 255 + 0.5f; - if (z < 0) z = 0; - if (z > 255) z = 255; - output[i*comp + k] = (stbi_uc) stbi__float2int(z); - } + for (k=0; k < n; ++k) { + float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f; + if (z < 0) z = 0; + if (z > 255) z = 255; + output[i*comp + k] = (stbi_uc) stbi__float2int(z); + } + if (k < comp) { + float z = data[i*comp+k] * 255 + 0.5f; + if (z < 0) z = 0; + if (z > 255) z = 255; + output[i*comp + k] = (stbi_uc) stbi__float2int(z); + } } STBI_FREE(data); return output; @@ -1662,39 +1662,39 @@ static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp) ////////////////////////////////////////////////////////////////////////////// // -// "baseline" JPEG/JFIF decoder +// "baseline" JPEG/JFIF decoder // -// simple implementation -// - doesn't support delayed output of y-dimension -// - simple interface (only one output format: 8-bit interleaved RGB) -// - doesn't try to recover corrupt jpegs -// - doesn't allow partial loading, loading multiple at once -// - still fast on x86 (copying globals into locals doesn't help x86) -// - allocates lots of intermediate memory (full size of all components) -// - non-interleaved case requires this anyway -// - allows good upsampling (see next) -// high-quality -// - upsampled channels are bilinearly interpolated, even across blocks -// - quality integer IDCT derived from IJG's 'slow' -// performance -// - fast huffman; reasonable integer IDCT -// - some SIMD kernels for common paths on targets with SSE2/NEON -// - uses a lot of intermediate memory, could cache poorly +// simple implementation +// - doesn't support delayed output of y-dimension +// - simple interface (only one output format: 8-bit interleaved RGB) +// - doesn't try to recover corrupt jpegs +// - doesn't allow partial loading, loading multiple at once +// - still fast on x86 (copying globals into locals doesn't help x86) +// - allocates lots of intermediate memory (full size of all components) +// - non-interleaved case requires this anyway +// - allows good upsampling (see next) +// high-quality +// - upsampled channels are bilinearly interpolated, even across blocks +// - quality integer IDCT derived from IJG's 'slow' +// performance +// - fast huffman; reasonable integer IDCT +// - some SIMD kernels for common paths on targets with SSE2/NEON +// - uses a lot of intermediate memory, could cache poorly #ifndef STBI_NO_JPEG // huffman decoding acceleration -#define FAST_BITS 9 // larger handles more cases; smaller stomps less cache +#define FAST_BITS 9 // larger handles more cases; smaller stomps less cache typedef struct { - stbi_uc fast[1 << FAST_BITS]; + stbi_uc fast[1 << FAST_BITS]; // weirdly, repacking this into AoS is a 10% speed loss, instead of a win stbi__uint16 code[256]; - stbi_uc values[256]; - stbi_uc size[257]; + stbi_uc values[256]; + stbi_uc size[257]; unsigned int maxcode[18]; - int delta[17]; // old 'firstsymbol' - old 'firstcode' + int delta[17]; // old 'firstsymbol' - old 'firstcode' } stbi__huffman; typedef struct @@ -1713,34 +1713,34 @@ typedef struct // definition of jpeg image component struct { - int id; - int h,v; - int tq; - int hd,ha; - int dc_pred; - - int x,y,w2,h2; - stbi_uc *data; - void *raw_data, *raw_coeff; - stbi_uc *linebuf; - short *coeff; // progressive only - int coeff_w, coeff_h; // number of 8x8 coefficient blocks + int id; + int h,v; + int tq; + int hd,ha; + int dc_pred; + + int x,y,w2,h2; + stbi_uc *data; + void *raw_data, *raw_coeff; + stbi_uc *linebuf; + short *coeff; // progressive only + int coeff_w, coeff_h; // number of 8x8 coefficient blocks } img_comp[4]; - stbi__uint32 code_buffer; // jpeg entropy-coded buffer - int code_bits; // number of valid bits - unsigned char marker; // marker seen while filling entropy buffer - int nomore; // flag if we saw a marker so must stop - - int progressive; - int spec_start; - int spec_end; - int succ_high; - int succ_low; - int eob_run; - int jfif; - int app14_color_transform; // Adobe APP14 tag - int rgb; + stbi__uint32 code_buffer; // jpeg entropy-coded buffer + int code_bits; // number of valid bits + unsigned char marker; // marker seen while filling entropy buffer + int nomore; // flag if we saw a marker so must stop + + int progressive; + int spec_start; + int spec_end; + int succ_high; + int succ_low; + int eob_run; + int jfif; + int app14_color_transform; // Adobe APP14 tag + int rgb; int scan_n, order[4]; int restart_interval, todo; @@ -1757,38 +1757,38 @@ static int stbi__build_huffman(stbi__huffman *h, int *count) unsigned int code; // build size list for each symbol (from JPEG spec) for (i=0; i < 16; ++i) - for (j=0; j < count[i]; ++j) - h->size[k++] = (stbi_uc) (i+1); + for (j=0; j < count[i]; ++j) + h->size[k++] = (stbi_uc) (i+1); h->size[k] = 0; // compute actual symbols (from jpeg spec) code = 0; k = 0; for(j=1; j <= 16; ++j) { - // compute delta to add to code to compute symbol id - h->delta[j] = k - code; - if (h->size[k] == j) { - while (h->size[k] == j) - h->code[k++] = (stbi__uint16) (code++); - if (code-1 >= (1u << j)) return stbi__err("bad code lengths","Corrupt JPEG"); - } - // compute largest code + 1 for this size, preshifted as needed later - h->maxcode[j] = code << (16-j); - code <<= 1; + // compute delta to add to code to compute symbol id + h->delta[j] = k - code; + if (h->size[k] == j) { + while (h->size[k] == j) + h->code[k++] = (stbi__uint16) (code++); + if (code-1 >= (1u << j)) return stbi__err("bad code lengths","Corrupt JPEG"); + } + // compute largest code + 1 for this size, preshifted as needed later + h->maxcode[j] = code << (16-j); + code <<= 1; } h->maxcode[j] = 0xffffffff; // build non-spec acceleration table; 255 is flag for not-accelerated memset(h->fast, 255, 1 << FAST_BITS); for (i=0; i < k; ++i) { - int s = h->size[i]; - if (s <= FAST_BITS) { - int c = h->code[i] << (FAST_BITS-s); - int m = 1 << (FAST_BITS-s); - for (j=0; j < m; ++j) { - h->fast[c+j] = (stbi_uc) i; - } - } + int s = h->size[i]; + if (s <= FAST_BITS) { + int c = h->code[i] << (FAST_BITS-s); + int m = 1 << (FAST_BITS-s); + for (j=0; j < m; ++j) { + h->fast[c+j] = (stbi_uc) i; + } + } } return 1; } @@ -1799,42 +1799,42 @@ static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h) { int i; for (i=0; i < (1 << FAST_BITS); ++i) { - stbi_uc fast = h->fast[i]; - fast_ac[i] = 0; - if (fast < 255) { - int rs = h->values[fast]; - int run = (rs >> 4) & 15; - int magbits = rs & 15; - int len = h->size[fast]; - - if (magbits && len + magbits <= FAST_BITS) { - // magnitude code followed by receive_extend code - int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits); - int m = 1 << (magbits - 1); - if (k < m) k += (~0U << magbits) + 1; - // if the result is small enough, we can fit it in fast_ac table - if (k >= -128 && k <= 127) - fast_ac[i] = (stbi__int16) ((k * 256) + (run * 16) + (len + magbits)); - } - } + stbi_uc fast = h->fast[i]; + fast_ac[i] = 0; + if (fast < 255) { + int rs = h->values[fast]; + int run = (rs >> 4) & 15; + int magbits = rs & 15; + int len = h->size[fast]; + + if (magbits && len + magbits <= FAST_BITS) { + // magnitude code followed by receive_extend code + int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits); + int m = 1 << (magbits - 1); + if (k < m) k += (~0U << magbits) + 1; + // if the result is small enough, we can fit it in fast_ac table + if (k >= -128 && k <= 127) + fast_ac[i] = (stbi__int16) ((k * 256) + (run * 16) + (len + magbits)); + } + } } } static void stbi__grow_buffer_unsafe(stbi__jpeg *j) { do { - unsigned int b = j->nomore ? 0 : stbi__get8(j->s); - if (b == 0xff) { - int c = stbi__get8(j->s); - while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes - if (c != 0) { - j->marker = (unsigned char) c; - j->nomore = 1; - return; - } - } - j->code_buffer |= b << (24 - j->code_bits); - j->code_bits += 8; + unsigned int b = j->nomore ? 0 : stbi__get8(j->s); + if (b == 0xff) { + int c = stbi__get8(j->s); + while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes + if (c != 0) { + j->marker = (unsigned char) c; + j->nomore = 1; + return; + } + } + j->code_buffer |= b << (24 - j->code_bits); + j->code_bits += 8; } while (j->code_bits <= 24); } @@ -1854,12 +1854,12 @@ stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h) c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); k = h->fast[c]; if (k < 255) { - int s = h->size[k]; - if (s > j->code_bits) - return -1; - j->code_buffer <<= s; - j->code_bits -= s; - return h->values[k]; + int s = h->size[k]; + if (s > j->code_bits) + return -1; + j->code_buffer <<= s; + j->code_bits -= s; + return h->values[k]; } // naive test is to shift the code_buffer down so k bits are @@ -1870,16 +1870,16 @@ stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h) // that way we don't need to shift inside the loop. temp = j->code_buffer >> 16; for (k=FAST_BITS+1 ; ; ++k) - if (temp < h->maxcode[k]) - break; + if (temp < h->maxcode[k]) + break; if (k == 17) { - // error! code not found - j->code_bits -= 16; - return -1; + // error! code not found + j->code_bits -= 16; + return -1; } if (k > j->code_bits) - return -1; + return -1; // convert the huffman code to the symbol id c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k]; @@ -1937,10 +1937,10 @@ stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j) // where does it appear in the 8x8 matrix coded as row-major? static const stbi_uc stbi__jpeg_dezigzag[64+15] = { - 0, 1, 8, 16, 9, 2, 3, 10, - 17, 24, 32, 25, 18, 11, 4, 5, + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, 41, 34, - 27, 20, 13, 6, 7, 14, 21, 28, + 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, @@ -1971,34 +1971,34 @@ static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman // decode AC components, see JPEG spec k = 1; do { - unsigned int zig; - int c,r,s; - if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); - c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); - r = fac[c]; - if (r) { // fast-AC path - k += (r >> 4) & 15; // run - s = r & 15; // combined length - j->code_buffer <<= s; - j->code_bits -= s; - // decode into unzigzag'd location - zig = stbi__jpeg_dezigzag[k++]; - data[zig] = (short) ((r >> 8) * dequant[zig]); - } else { - int rs = stbi__jpeg_huff_decode(j, hac); - if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); - s = rs & 15; - r = rs >> 4; - if (s == 0) { - if (rs != 0xf0) break; // end block - k += 16; - } else { - k += r; - // decode into unzigzag'd location - zig = stbi__jpeg_dezigzag[k++]; - data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]); - } - } + unsigned int zig; + int c,r,s; + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); + r = fac[c]; + if (r) { // fast-AC path + k += (r >> 4) & 15; // run + s = r & 15; // combined length + j->code_buffer <<= s; + j->code_bits -= s; + // decode into unzigzag'd location + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) ((r >> 8) * dequant[zig]); + } else { + int rs = stbi__jpeg_huff_decode(j, hac); + if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (rs != 0xf0) break; // end block + k += 16; + } else { + k += r; + // decode into unzigzag'd location + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]); + } + } } while (k < 64); return 1; } @@ -2012,18 +2012,18 @@ static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__ if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); if (j->succ_high == 0) { - // first scan for DC coefficient, must be first - memset(data,0,64*sizeof(data[0])); // 0 all the ac values now - t = stbi__jpeg_huff_decode(j, hdc); - diff = t ? stbi__extend_receive(j, t) : 0; - - dc = j->img_comp[b].dc_pred + diff; - j->img_comp[b].dc_pred = dc; - data[0] = (short) (dc << j->succ_low); + // first scan for DC coefficient, must be first + memset(data,0,64*sizeof(data[0])); // 0 all the ac values now + t = stbi__jpeg_huff_decode(j, hdc); + diff = t ? stbi__extend_receive(j, t) : 0; + + dc = j->img_comp[b].dc_pred + diff; + j->img_comp[b].dc_pred = dc; + data[0] = (short) (dc << j->succ_low); } else { - // refinement scan for DC coefficient - if (stbi__jpeg_get_bit(j)) - data[0] += (short) (1 << j->succ_low); + // refinement scan for DC coefficient + if (stbi__jpeg_get_bit(j)) + data[0] += (short) (1 << j->succ_low); } return 1; } @@ -2036,115 +2036,115 @@ static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__ if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); if (j->succ_high == 0) { - int shift = j->succ_low; - - if (j->eob_run) { - --j->eob_run; - return 1; - } - - k = j->spec_start; - do { - unsigned int zig; - int c,r,s; - if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); - c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); - r = fac[c]; - if (r) { // fast-AC path - k += (r >> 4) & 15; // run - s = r & 15; // combined length - j->code_buffer <<= s; - j->code_bits -= s; - zig = stbi__jpeg_dezigzag[k++]; - data[zig] = (short) ((r >> 8) << shift); - } else { - int rs = stbi__jpeg_huff_decode(j, hac); - if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); - s = rs & 15; - r = rs >> 4; - if (s == 0) { - if (r < 15) { - j->eob_run = (1 << r); - if (r) - j->eob_run += stbi__jpeg_get_bits(j, r); - --j->eob_run; - break; - } - k += 16; - } else { - k += r; - zig = stbi__jpeg_dezigzag[k++]; - data[zig] = (short) (stbi__extend_receive(j,s) << shift); - } - } - } while (k <= j->spec_end); + int shift = j->succ_low; + + if (j->eob_run) { + --j->eob_run; + return 1; + } + + k = j->spec_start; + do { + unsigned int zig; + int c,r,s; + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); + r = fac[c]; + if (r) { // fast-AC path + k += (r >> 4) & 15; // run + s = r & 15; // combined length + j->code_buffer <<= s; + j->code_bits -= s; + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) ((r >> 8) << shift); + } else { + int rs = stbi__jpeg_huff_decode(j, hac); + if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (r < 15) { + j->eob_run = (1 << r); + if (r) + j->eob_run += stbi__jpeg_get_bits(j, r); + --j->eob_run; + break; + } + k += 16; + } else { + k += r; + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) (stbi__extend_receive(j,s) << shift); + } + } + } while (k <= j->spec_end); } else { - // refinement scan for these AC coefficients - - short bit = (short) (1 << j->succ_low); - - if (j->eob_run) { - --j->eob_run; - for (k = j->spec_start; k <= j->spec_end; ++k) { - short *p = &data[stbi__jpeg_dezigzag[k]]; - if (*p != 0) - if (stbi__jpeg_get_bit(j)) - if ((*p & bit)==0) { - if (*p > 0) - *p += bit; - else - *p -= bit; - } - } - } else { - k = j->spec_start; - do { - int r,s; - int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh - if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); - s = rs & 15; - r = rs >> 4; - if (s == 0) { - if (r < 15) { - j->eob_run = (1 << r) - 1; - if (r) - j->eob_run += stbi__jpeg_get_bits(j, r); - r = 64; // force end of block - } else { - // r=15 s=0 should write 16 0s, so we just do - // a run of 15 0s and then write s (which is 0), - // so we don't have to do anything special here - } - } else { - if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG"); - // sign bit - if (stbi__jpeg_get_bit(j)) - s = bit; - else - s = -bit; - } - - // advance by r - while (k <= j->spec_end) { - short *p = &data[stbi__jpeg_dezigzag[k++]]; - if (*p != 0) { - if (stbi__jpeg_get_bit(j)) - if ((*p & bit)==0) { - if (*p > 0) - *p += bit; - else - *p -= bit; - } - } else { - if (r == 0) { - *p = (short) s; - break; - } - --r; - } - } - } while (k <= j->spec_end); - } + // refinement scan for these AC coefficients + + short bit = (short) (1 << j->succ_low); + + if (j->eob_run) { + --j->eob_run; + for (k = j->spec_start; k <= j->spec_end; ++k) { + short *p = &data[stbi__jpeg_dezigzag[k]]; + if (*p != 0) + if (stbi__jpeg_get_bit(j)) + if ((*p & bit)==0) { + if (*p > 0) + *p += bit; + else + *p -= bit; + } + } + } else { + k = j->spec_start; + do { + int r,s; + int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh + if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (r < 15) { + j->eob_run = (1 << r) - 1; + if (r) + j->eob_run += stbi__jpeg_get_bits(j, r); + r = 64; // force end of block + } else { + // r=15 s=0 should write 16 0s, so we just do + // a run of 15 0s and then write s (which is 0), + // so we don't have to do anything special here + } + } else { + if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG"); + // sign bit + if (stbi__jpeg_get_bit(j)) + s = bit; + else + s = -bit; + } + + // advance by r + while (k <= j->spec_end) { + short *p = &data[stbi__jpeg_dezigzag[k++]]; + if (*p != 0) { + if (stbi__jpeg_get_bit(j)) + if ((*p & bit)==0) { + if (*p > 0) + *p += bit; + else + *p -= bit; + } + } else { + if (r == 0) { + *p = (short) s; + break; + } + --r; + } + } + } while (k <= j->spec_end); + } } return 1; } @@ -2154,8 +2154,8 @@ stbi_inline static stbi_uc stbi__clamp(int x) { // trick to use a single test to catch both cases if ((unsigned int) x > 255) { - if (x < 0) return 0; - if (x > 255) return 255; + if (x < 0) return 0; + if (x > 255) return 255; } return (stbi_uc) x; } @@ -2166,39 +2166,39 @@ stbi_inline static stbi_uc stbi__clamp(int x) // derived from jidctint -- DCT_ISLOW #define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \ int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \ - p2 = s2; \ - p3 = s6; \ - p1 = (p2+p3) * stbi__f2f(0.5411961f); \ - t2 = p1 + p3*stbi__f2f(-1.847759065f); \ - t3 = p1 + p2*stbi__f2f( 0.765366865f); \ - p2 = s0; \ - p3 = s4; \ - t0 = stbi__fsh(p2+p3); \ - t1 = stbi__fsh(p2-p3); \ - x0 = t0+t3; \ - x3 = t0-t3; \ - x1 = t1+t2; \ - x2 = t1-t2; \ - t0 = s7; \ - t1 = s5; \ - t2 = s3; \ - t3 = s1; \ - p3 = t0+t2; \ - p4 = t1+t3; \ - p1 = t0+t3; \ - p2 = t1+t2; \ - p5 = (p3+p4)*stbi__f2f( 1.175875602f); \ - t0 = t0*stbi__f2f( 0.298631336f); \ - t1 = t1*stbi__f2f( 2.053119869f); \ - t2 = t2*stbi__f2f( 3.072711026f); \ - t3 = t3*stbi__f2f( 1.501321110f); \ - p1 = p5 + p1*stbi__f2f(-0.899976223f); \ - p2 = p5 + p2*stbi__f2f(-2.562915447f); \ - p3 = p3*stbi__f2f(-1.961570560f); \ - p4 = p4*stbi__f2f(-0.390180644f); \ - t3 += p1+p4; \ - t2 += p2+p3; \ - t1 += p2+p4; \ + p2 = s2; \ + p3 = s6; \ + p1 = (p2+p3) * stbi__f2f(0.5411961f); \ + t2 = p1 + p3*stbi__f2f(-1.847759065f); \ + t3 = p1 + p2*stbi__f2f( 0.765366865f); \ + p2 = s0; \ + p3 = s4; \ + t0 = stbi__fsh(p2+p3); \ + t1 = stbi__fsh(p2-p3); \ + x0 = t0+t3; \ + x3 = t0-t3; \ + x1 = t1+t2; \ + x2 = t1-t2; \ + t0 = s7; \ + t1 = s5; \ + t2 = s3; \ + t3 = s1; \ + p3 = t0+t2; \ + p4 = t1+t3; \ + p1 = t0+t3; \ + p2 = t1+t2; \ + p5 = (p3+p4)*stbi__f2f( 1.175875602f); \ + t0 = t0*stbi__f2f( 0.298631336f); \ + t1 = t1*stbi__f2f( 2.053119869f); \ + t2 = t2*stbi__f2f( 3.072711026f); \ + t3 = t3*stbi__f2f( 1.501321110f); \ + p1 = p5 + p1*stbi__f2f(-0.899976223f); \ + p2 = p5 + p2*stbi__f2f(-2.562915447f); \ + p3 = p3*stbi__f2f(-1.961570560f); \ + p4 = p4*stbi__f2f(-0.390180644f); \ + t3 += p1+p4; \ + t2 += p2+p3; \ + t1 += p2+p4; \ t0 += p1+p3; static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64]) @@ -2209,54 +2209,54 @@ static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64]) // columns for (i=0; i < 8; ++i,++d, ++v) { - // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing - if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0 - && d[40]==0 && d[48]==0 && d[56]==0) { - // no shortcut 0 seconds - // (1|2|3|4|5|6|7)==0 0 seconds - // all separate -0.047 seconds - // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds - int dcterm = d[0]*4; - v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm; - } else { - STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56]) - // constants scaled things up by 1<<12; let's bring them back - // down, but keep 2 extra bits of precision - x0 += 512; x1 += 512; x2 += 512; x3 += 512; - v[ 0] = (x0+t3) >> 10; - v[56] = (x0-t3) >> 10; - v[ 8] = (x1+t2) >> 10; - v[48] = (x1-t2) >> 10; - v[16] = (x2+t1) >> 10; - v[40] = (x2-t1) >> 10; - v[24] = (x3+t0) >> 10; - v[32] = (x3-t0) >> 10; - } + // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing + if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0 + && d[40]==0 && d[48]==0 && d[56]==0) { + // no shortcut 0 seconds + // (1|2|3|4|5|6|7)==0 0 seconds + // all separate -0.047 seconds + // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds + int dcterm = d[0]*4; + v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm; + } else { + STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56]) + // constants scaled things up by 1<<12; let's bring them back + // down, but keep 2 extra bits of precision + x0 += 512; x1 += 512; x2 += 512; x3 += 512; + v[ 0] = (x0+t3) >> 10; + v[56] = (x0-t3) >> 10; + v[ 8] = (x1+t2) >> 10; + v[48] = (x1-t2) >> 10; + v[16] = (x2+t1) >> 10; + v[40] = (x2-t1) >> 10; + v[24] = (x3+t0) >> 10; + v[32] = (x3-t0) >> 10; + } } for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) { - // no fast case since the first 1D IDCT spread components out - STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]) - // constants scaled things up by 1<<12, plus we had 1<<2 from first - // loop, plus horizontal and vertical each scale by sqrt(8) so together - // we've got an extra 1<<3, so 1<<17 total we need to remove. - // so we want to round that, which means adding 0.5 * 1<<17, - // aka 65536. Also, we'll end up with -128 to 127 that we want - // to encode as 0..255 by adding 128, so we'll add that before the shift - x0 += 65536 + (128<<17); - x1 += 65536 + (128<<17); - x2 += 65536 + (128<<17); - x3 += 65536 + (128<<17); - // tried computing the shifts into temps, or'ing the temps to see - // if any were out of range, but that was slower - o[0] = stbi__clamp((x0+t3) >> 17); - o[7] = stbi__clamp((x0-t3) >> 17); - o[1] = stbi__clamp((x1+t2) >> 17); - o[6] = stbi__clamp((x1-t2) >> 17); - o[2] = stbi__clamp((x2+t1) >> 17); - o[5] = stbi__clamp((x2-t1) >> 17); - o[3] = stbi__clamp((x3+t0) >> 17); - o[4] = stbi__clamp((x3-t0) >> 17); + // no fast case since the first 1D IDCT spread components out + STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]) + // constants scaled things up by 1<<12, plus we had 1<<2 from first + // loop, plus horizontal and vertical each scale by sqrt(8) so together + // we've got an extra 1<<3, so 1<<17 total we need to remove. + // so we want to round that, which means adding 0.5 * 1<<17, + // aka 65536. Also, we'll end up with -128 to 127 that we want + // to encode as 0..255 by adding 128, so we'll add that before the shift + x0 += 65536 + (128<<17); + x1 += 65536 + (128<<17); + x2 += 65536 + (128<<17); + x3 += 65536 + (128<<17); + // tried computing the shifts into temps, or'ing the temps to see + // if any were out of range, but that was slower + o[0] = stbi__clamp((x0+t3) >> 17); + o[7] = stbi__clamp((x0-t3) >> 17); + o[1] = stbi__clamp((x1+t2) >> 17); + o[6] = stbi__clamp((x1-t2) >> 17); + o[2] = stbi__clamp((x2+t1) >> 17); + o[5] = stbi__clamp((x2-t1) >> 17); + o[3] = stbi__clamp((x3+t0) >> 17); + o[4] = stbi__clamp((x3-t0) >> 17); } } @@ -2273,81 +2273,81 @@ static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64]) // dot product constant: even elems=x, odd elems=y #define dct_const(x,y) _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y)) - // out(0) = c0[even]*x + c0[odd]*y (c0, x, y 16-bit, out 32-bit) + // out(0) = c0[even]*x + c0[odd]*y (c0, x, y 16-bit, out 32-bit) // out(1) = c1[even]*x + c1[odd]*y #define dct_rot(out0,out1, x,y,c0,c1) \ - __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \ - __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \ - __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \ - __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \ - __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \ - __m128i out1##_h = _mm_madd_epi16(c0##hi, c1) + __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \ + __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \ + __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \ + __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \ + __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \ + __m128i out1##_h = _mm_madd_epi16(c0##hi, c1) // out = in << 12 (in 16-bit, out 32-bit) #define dct_widen(out, in) \ - __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \ - __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4) + __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \ + __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4) // wide add #define dct_wadd(out, a, b) \ - __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \ - __m128i out##_h = _mm_add_epi32(a##_h, b##_h) + __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \ + __m128i out##_h = _mm_add_epi32(a##_h, b##_h) // wide sub #define dct_wsub(out, a, b) \ - __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \ - __m128i out##_h = _mm_sub_epi32(a##_h, b##_h) + __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \ + __m128i out##_h = _mm_sub_epi32(a##_h, b##_h) // butterfly a/b, add bias, then shift by "s" and pack #define dct_bfly32o(out0, out1, a,b,bias,s) \ - { \ - __m128i abiased_l = _mm_add_epi32(a##_l, bias); \ - __m128i abiased_h = _mm_add_epi32(a##_h, bias); \ - dct_wadd(sum, abiased, b); \ - dct_wsub(dif, abiased, b); \ - out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \ - out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \ - } + { \ + __m128i abiased_l = _mm_add_epi32(a##_l, bias); \ + __m128i abiased_h = _mm_add_epi32(a##_h, bias); \ + dct_wadd(sum, abiased, b); \ + dct_wsub(dif, abiased, b); \ + out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \ + out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \ + } // 8-bit interleave step (for transposes) #define dct_interleave8(a, b) \ - tmp = a; \ - a = _mm_unpacklo_epi8(a, b); \ - b = _mm_unpackhi_epi8(tmp, b) + tmp = a; \ + a = _mm_unpacklo_epi8(a, b); \ + b = _mm_unpackhi_epi8(tmp, b) // 16-bit interleave step (for transposes) #define dct_interleave16(a, b) \ - tmp = a; \ - a = _mm_unpacklo_epi16(a, b); \ - b = _mm_unpackhi_epi16(tmp, b) + tmp = a; \ + a = _mm_unpacklo_epi16(a, b); \ + b = _mm_unpackhi_epi16(tmp, b) #define dct_pass(bias,shift) \ - { \ - /* even part */ \ - dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \ - __m128i sum04 = _mm_add_epi16(row0, row4); \ - __m128i dif04 = _mm_sub_epi16(row0, row4); \ - dct_widen(t0e, sum04); \ - dct_widen(t1e, dif04); \ - dct_wadd(x0, t0e, t3e); \ - dct_wsub(x3, t0e, t3e); \ - dct_wadd(x1, t1e, t2e); \ - dct_wsub(x2, t1e, t2e); \ - /* odd part */ \ - dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \ - dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \ - __m128i sum17 = _mm_add_epi16(row1, row7); \ - __m128i sum35 = _mm_add_epi16(row3, row5); \ - dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \ - dct_wadd(x4, y0o, y4o); \ - dct_wadd(x5, y1o, y5o); \ - dct_wadd(x6, y2o, y5o); \ - dct_wadd(x7, y3o, y4o); \ - dct_bfly32o(row0,row7, x0,x7,bias,shift); \ - dct_bfly32o(row1,row6, x1,x6,bias,shift); \ - dct_bfly32o(row2,row5, x2,x5,bias,shift); \ - dct_bfly32o(row3,row4, x3,x4,bias,shift); \ - } + { \ + /* even part */ \ + dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \ + __m128i sum04 = _mm_add_epi16(row0, row4); \ + __m128i dif04 = _mm_sub_epi16(row0, row4); \ + dct_widen(t0e, sum04); \ + dct_widen(t1e, dif04); \ + dct_wadd(x0, t0e, t3e); \ + dct_wsub(x3, t0e, t3e); \ + dct_wadd(x1, t1e, t2e); \ + dct_wsub(x2, t1e, t2e); \ + /* odd part */ \ + dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \ + dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \ + __m128i sum17 = _mm_add_epi16(row1, row7); \ + __m128i sum35 = _mm_add_epi16(row3, row5); \ + dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \ + dct_wadd(x4, y0o, y4o); \ + dct_wadd(x5, y1o, y5o); \ + dct_wadd(x6, y2o, y5o); \ + dct_wadd(x7, y3o, y4o); \ + dct_bfly32o(row0,row7, x0,x7,bias,shift); \ + dct_bfly32o(row1,row6, x1,x6,bias,shift); \ + dct_bfly32o(row2,row5, x2,x5,bias,shift); \ + dct_bfly32o(row3,row4, x3,x4,bias,shift); \ + } __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f)); __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f)); @@ -2376,56 +2376,56 @@ static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64]) dct_pass(bias_0, 10); { - // 16bit 8x8 transpose pass 1 - dct_interleave16(row0, row4); - dct_interleave16(row1, row5); - dct_interleave16(row2, row6); - dct_interleave16(row3, row7); + // 16bit 8x8 transpose pass 1 + dct_interleave16(row0, row4); + dct_interleave16(row1, row5); + dct_interleave16(row2, row6); + dct_interleave16(row3, row7); - // transpose pass 2 - dct_interleave16(row0, row2); - dct_interleave16(row1, row3); - dct_interleave16(row4, row6); - dct_interleave16(row5, row7); + // transpose pass 2 + dct_interleave16(row0, row2); + dct_interleave16(row1, row3); + dct_interleave16(row4, row6); + dct_interleave16(row5, row7); - // transpose pass 3 - dct_interleave16(row0, row1); - dct_interleave16(row2, row3); - dct_interleave16(row4, row5); - dct_interleave16(row6, row7); + // transpose pass 3 + dct_interleave16(row0, row1); + dct_interleave16(row2, row3); + dct_interleave16(row4, row5); + dct_interleave16(row6, row7); } // row pass dct_pass(bias_1, 17); { - // pack - __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7 - __m128i p1 = _mm_packus_epi16(row2, row3); - __m128i p2 = _mm_packus_epi16(row4, row5); - __m128i p3 = _mm_packus_epi16(row6, row7); - - // 8bit 8x8 transpose pass 1 - dct_interleave8(p0, p2); // a0e0a1e1... - dct_interleave8(p1, p3); // c0g0c1g1... - - // transpose pass 2 - dct_interleave8(p0, p1); // a0c0e0g0... - dct_interleave8(p2, p3); // b0d0f0h0... - - // transpose pass 3 - dct_interleave8(p0, p2); // a0b0c0d0... - dct_interleave8(p1, p3); // a4b4c4d4... - - // store - _mm_storel_epi64((__m128i *) out, p0); out += out_stride; - _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride; - _mm_storel_epi64((__m128i *) out, p2); out += out_stride; - _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride; - _mm_storel_epi64((__m128i *) out, p1); out += out_stride; - _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride; - _mm_storel_epi64((__m128i *) out, p3); out += out_stride; - _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e)); + // pack + __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7 + __m128i p1 = _mm_packus_epi16(row2, row3); + __m128i p2 = _mm_packus_epi16(row4, row5); + __m128i p3 = _mm_packus_epi16(row6, row7); + + // 8bit 8x8 transpose pass 1 + dct_interleave8(p0, p2); // a0e0a1e1... + dct_interleave8(p1, p3); // c0g0c1g1... + + // transpose pass 2 + dct_interleave8(p0, p1); // a0c0e0g0... + dct_interleave8(p2, p3); // b0d0f0h0... + + // transpose pass 3 + dct_interleave8(p0, p2); // a0b0c0d0... + dct_interleave8(p1, p3); // a4b4c4d4... + + // store + _mm_storel_epi64((__m128i *) out, p0); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride; + _mm_storel_epi64((__m128i *) out, p2); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride; + _mm_storel_epi64((__m128i *) out, p1); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride; + _mm_storel_epi64((__m128i *) out, p3); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e)); } #undef dct_const @@ -2487,50 +2487,50 @@ static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64]) // butterfly a/b, then shift using "shiftop" by "s" and pack #define dct_bfly32o(out0,out1, a,b,shiftop,s) \ { \ - dct_wadd(sum, a, b); \ - dct_wsub(dif, a, b); \ - out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \ - out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \ + dct_wadd(sum, a, b); \ + dct_wsub(dif, a, b); \ + out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \ + out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \ } #define dct_pass(shiftop, shift) \ { \ - /* even part */ \ - int16x8_t sum26 = vaddq_s16(row2, row6); \ - dct_long_mul(p1e, sum26, rot0_0); \ - dct_long_mac(t2e, p1e, row6, rot0_1); \ - dct_long_mac(t3e, p1e, row2, rot0_2); \ - int16x8_t sum04 = vaddq_s16(row0, row4); \ - int16x8_t dif04 = vsubq_s16(row0, row4); \ - dct_widen(t0e, sum04); \ - dct_widen(t1e, dif04); \ - dct_wadd(x0, t0e, t3e); \ - dct_wsub(x3, t0e, t3e); \ - dct_wadd(x1, t1e, t2e); \ - dct_wsub(x2, t1e, t2e); \ - /* odd part */ \ - int16x8_t sum15 = vaddq_s16(row1, row5); \ - int16x8_t sum17 = vaddq_s16(row1, row7); \ - int16x8_t sum35 = vaddq_s16(row3, row5); \ - int16x8_t sum37 = vaddq_s16(row3, row7); \ - int16x8_t sumodd = vaddq_s16(sum17, sum35); \ - dct_long_mul(p5o, sumodd, rot1_0); \ - dct_long_mac(p1o, p5o, sum17, rot1_1); \ - dct_long_mac(p2o, p5o, sum35, rot1_2); \ - dct_long_mul(p3o, sum37, rot2_0); \ - dct_long_mul(p4o, sum15, rot2_1); \ - dct_wadd(sump13o, p1o, p3o); \ - dct_wadd(sump24o, p2o, p4o); \ - dct_wadd(sump23o, p2o, p3o); \ - dct_wadd(sump14o, p1o, p4o); \ - dct_long_mac(x4, sump13o, row7, rot3_0); \ - dct_long_mac(x5, sump24o, row5, rot3_1); \ - dct_long_mac(x6, sump23o, row3, rot3_2); \ - dct_long_mac(x7, sump14o, row1, rot3_3); \ - dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \ - dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \ - dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \ - dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \ + /* even part */ \ + int16x8_t sum26 = vaddq_s16(row2, row6); \ + dct_long_mul(p1e, sum26, rot0_0); \ + dct_long_mac(t2e, p1e, row6, rot0_1); \ + dct_long_mac(t3e, p1e, row2, rot0_2); \ + int16x8_t sum04 = vaddq_s16(row0, row4); \ + int16x8_t dif04 = vsubq_s16(row0, row4); \ + dct_widen(t0e, sum04); \ + dct_widen(t1e, dif04); \ + dct_wadd(x0, t0e, t3e); \ + dct_wsub(x3, t0e, t3e); \ + dct_wadd(x1, t1e, t2e); \ + dct_wsub(x2, t1e, t2e); \ + /* odd part */ \ + int16x8_t sum15 = vaddq_s16(row1, row5); \ + int16x8_t sum17 = vaddq_s16(row1, row7); \ + int16x8_t sum35 = vaddq_s16(row3, row5); \ + int16x8_t sum37 = vaddq_s16(row3, row7); \ + int16x8_t sumodd = vaddq_s16(sum17, sum35); \ + dct_long_mul(p5o, sumodd, rot1_0); \ + dct_long_mac(p1o, p5o, sum17, rot1_1); \ + dct_long_mac(p2o, p5o, sum35, rot1_2); \ + dct_long_mul(p3o, sum37, rot2_0); \ + dct_long_mul(p4o, sum15, rot2_1); \ + dct_wadd(sump13o, p1o, p3o); \ + dct_wadd(sump24o, p2o, p4o); \ + dct_wadd(sump23o, p2o, p3o); \ + dct_wadd(sump14o, p1o, p4o); \ + dct_long_mac(x4, sump13o, row7, rot3_0); \ + dct_long_mac(x5, sump24o, row5, rot3_1); \ + dct_long_mac(x6, sump23o, row3, rot3_2); \ + dct_long_mac(x7, sump14o, row1, rot3_3); \ + dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \ + dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \ + dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \ + dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \ } // load @@ -2557,23 +2557,23 @@ static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64]) #define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); } #define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); } - // pass 1 - dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6 - dct_trn16(row2, row3); - dct_trn16(row4, row5); - dct_trn16(row6, row7); + // pass 1 + dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6 + dct_trn16(row2, row3); + dct_trn16(row4, row5); + dct_trn16(row6, row7); - // pass 2 - dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4 - dct_trn32(row1, row3); - dct_trn32(row4, row6); - dct_trn32(row5, row7); + // pass 2 + dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4 + dct_trn32(row1, row3); + dct_trn32(row4, row6); + dct_trn32(row5, row7); - // pass 3 - dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0 - dct_trn64(row1, row5); - dct_trn64(row2, row6); - dct_trn64(row3, row7); + // pass 3 + dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0 + dct_trn64(row1, row5); + dct_trn64(row2, row6); + dct_trn64(row3, row7); #undef dct_trn16 #undef dct_trn32 @@ -2587,51 +2587,51 @@ static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64]) dct_pass(vshrn_n_s32, 16); { - // pack and round - uint8x8_t p0 = vqrshrun_n_s16(row0, 1); - uint8x8_t p1 = vqrshrun_n_s16(row1, 1); - uint8x8_t p2 = vqrshrun_n_s16(row2, 1); - uint8x8_t p3 = vqrshrun_n_s16(row3, 1); - uint8x8_t p4 = vqrshrun_n_s16(row4, 1); - uint8x8_t p5 = vqrshrun_n_s16(row5, 1); - uint8x8_t p6 = vqrshrun_n_s16(row6, 1); - uint8x8_t p7 = vqrshrun_n_s16(row7, 1); - - // again, these can translate into one instruction, but often don't. + // pack and round + uint8x8_t p0 = vqrshrun_n_s16(row0, 1); + uint8x8_t p1 = vqrshrun_n_s16(row1, 1); + uint8x8_t p2 = vqrshrun_n_s16(row2, 1); + uint8x8_t p3 = vqrshrun_n_s16(row3, 1); + uint8x8_t p4 = vqrshrun_n_s16(row4, 1); + uint8x8_t p5 = vqrshrun_n_s16(row5, 1); + uint8x8_t p6 = vqrshrun_n_s16(row6, 1); + uint8x8_t p7 = vqrshrun_n_s16(row7, 1); + + // again, these can translate into one instruction, but often don't. #define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; } #define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); } #define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); } - // sadly can't use interleaved stores here since we only write - // 8 bytes to each scan line! - - // 8x8 8-bit transpose pass 1 - dct_trn8_8(p0, p1); - dct_trn8_8(p2, p3); - dct_trn8_8(p4, p5); - dct_trn8_8(p6, p7); - - // pass 2 - dct_trn8_16(p0, p2); - dct_trn8_16(p1, p3); - dct_trn8_16(p4, p6); - dct_trn8_16(p5, p7); - - // pass 3 - dct_trn8_32(p0, p4); - dct_trn8_32(p1, p5); - dct_trn8_32(p2, p6); - dct_trn8_32(p3, p7); - - // store - vst1_u8(out, p0); out += out_stride; - vst1_u8(out, p1); out += out_stride; - vst1_u8(out, p2); out += out_stride; - vst1_u8(out, p3); out += out_stride; - vst1_u8(out, p4); out += out_stride; - vst1_u8(out, p5); out += out_stride; - vst1_u8(out, p6); out += out_stride; - vst1_u8(out, p7); + // sadly can't use interleaved stores here since we only write + // 8 bytes to each scan line! + + // 8x8 8-bit transpose pass 1 + dct_trn8_8(p0, p1); + dct_trn8_8(p2, p3); + dct_trn8_8(p4, p5); + dct_trn8_8(p6, p7); + + // pass 2 + dct_trn8_16(p0, p2); + dct_trn8_16(p1, p3); + dct_trn8_16(p4, p6); + dct_trn8_16(p5, p7); + + // pass 3 + dct_trn8_32(p0, p4); + dct_trn8_32(p1, p5); + dct_trn8_32(p2, p6); + dct_trn8_32(p3, p7); + + // store + vst1_u8(out, p0); out += out_stride; + vst1_u8(out, p1); out += out_stride; + vst1_u8(out, p2); out += out_stride; + vst1_u8(out, p3); out += out_stride; + vst1_u8(out, p4); out += out_stride; + vst1_u8(out, p5); out += out_stride; + vst1_u8(out, p6); out += out_stride; + vst1_u8(out, p7); #undef dct_trn8_8 #undef dct_trn8_16 @@ -2660,13 +2660,13 @@ static stbi_uc stbi__get_marker(stbi__jpeg *j) x = stbi__get8(j->s); if (x != 0xff) return STBI__MARKER_none; while (x == 0xff) - x = stbi__get8(j->s); // consume repeated 0xff fill bytes + x = stbi__get8(j->s); // consume repeated 0xff fill bytes return x; } // in each scan, we'll have scan_n components, and the order // of the components is specified by order[] -#define STBI__RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7) +#define STBI__RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7) // after a restart interval, stbi__jpeg_reset the entropy decoder and // the dc prediction @@ -2687,123 +2687,123 @@ static int stbi__parse_entropy_coded_data(stbi__jpeg *z) { stbi__jpeg_reset(z); if (!z->progressive) { - if (z->scan_n == 1) { - int i,j; - STBI_SIMD_ALIGN(short, data[64]); - int n = z->order[0]; - // non-interleaved data, we just need to process one block at a time, - // in trivial scanline order - // number of blocks to do just depends on how many actual "pixels" this - // component has, independent of interleaved MCU blocking and such - int w = (z->img_comp[n].x+7) >> 3; - int h = (z->img_comp[n].y+7) >> 3; - for (j=0; j < h; ++j) { - for (i=0; i < w; ++i) { - int ha = z->img_comp[n].ha; - if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0; - z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data); - // every data block is an MCU, so countdown the restart interval - if (--z->todo <= 0) { - if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); - // if it's NOT a restart, then just bail, so we get corrupt data - // rather than no data - if (!STBI__RESTART(z->marker)) return 1; - stbi__jpeg_reset(z); - } - } - } - return 1; - } else { // interleaved - int i,j,k,x,y; - STBI_SIMD_ALIGN(short, data[64]); - for (j=0; j < z->img_mcu_y; ++j) { - for (i=0; i < z->img_mcu_x; ++i) { - // scan an interleaved mcu... process scan_n components in order - for (k=0; k < z->scan_n; ++k) { - int n = z->order[k]; - // scan out an mcu's worth of this component; that's just determined - // by the basic H and V specified for the component - for (y=0; y < z->img_comp[n].v; ++y) { - for (x=0; x < z->img_comp[n].h; ++x) { - int x2 = (i*z->img_comp[n].h + x)*8; - int y2 = (j*z->img_comp[n].v + y)*8; - int ha = z->img_comp[n].ha; - if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0; - z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data); - } - } - } - // after all interleaved components, that's an interleaved MCU, - // so now count down the restart interval - if (--z->todo <= 0) { - if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); - if (!STBI__RESTART(z->marker)) return 1; - stbi__jpeg_reset(z); - } - } - } - return 1; - } + if (z->scan_n == 1) { + int i,j; + STBI_SIMD_ALIGN(short, data[64]); + int n = z->order[0]; + // non-interleaved data, we just need to process one block at a time, + // in trivial scanline order + // number of blocks to do just depends on how many actual "pixels" this + // component has, independent of interleaved MCU blocking and such + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + int ha = z->img_comp[n].ha; + if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0; + z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data); + // every data block is an MCU, so countdown the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + // if it's NOT a restart, then just bail, so we get corrupt data + // rather than no data + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } else { // interleaved + int i,j,k,x,y; + STBI_SIMD_ALIGN(short, data[64]); + for (j=0; j < z->img_mcu_y; ++j) { + for (i=0; i < z->img_mcu_x; ++i) { + // scan an interleaved mcu... process scan_n components in order + for (k=0; k < z->scan_n; ++k) { + int n = z->order[k]; + // scan out an mcu's worth of this component; that's just determined + // by the basic H and V specified for the component + for (y=0; y < z->img_comp[n].v; ++y) { + for (x=0; x < z->img_comp[n].h; ++x) { + int x2 = (i*z->img_comp[n].h + x)*8; + int y2 = (j*z->img_comp[n].v + y)*8; + int ha = z->img_comp[n].ha; + if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0; + z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data); + } + } + } + // after all interleaved components, that's an interleaved MCU, + // so now count down the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } } else { - if (z->scan_n == 1) { - int i,j; - int n = z->order[0]; - // non-interleaved data, we just need to process one block at a time, - // in trivial scanline order - // number of blocks to do just depends on how many actual "pixels" this - // component has, independent of interleaved MCU blocking and such - int w = (z->img_comp[n].x+7) >> 3; - int h = (z->img_comp[n].y+7) >> 3; - for (j=0; j < h; ++j) { - for (i=0; i < w; ++i) { - short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w); - if (z->spec_start == 0) { - if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n)) - return 0; - } else { - int ha = z->img_comp[n].ha; - if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha])) - return 0; - } - // every data block is an MCU, so countdown the restart interval - if (--z->todo <= 0) { - if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); - if (!STBI__RESTART(z->marker)) return 1; - stbi__jpeg_reset(z); - } - } - } - return 1; - } else { // interleaved - int i,j,k,x,y; - for (j=0; j < z->img_mcu_y; ++j) { - for (i=0; i < z->img_mcu_x; ++i) { - // scan an interleaved mcu... process scan_n components in order - for (k=0; k < z->scan_n; ++k) { - int n = z->order[k]; - // scan out an mcu's worth of this component; that's just determined - // by the basic H and V specified for the component - for (y=0; y < z->img_comp[n].v; ++y) { - for (x=0; x < z->img_comp[n].h; ++x) { - int x2 = (i*z->img_comp[n].h + x); - int y2 = (j*z->img_comp[n].v + y); - short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w); - if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n)) - return 0; - } - } - } - // after all interleaved components, that's an interleaved MCU, - // so now count down the restart interval - if (--z->todo <= 0) { - if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); - if (!STBI__RESTART(z->marker)) return 1; - stbi__jpeg_reset(z); - } - } - } - return 1; - } + if (z->scan_n == 1) { + int i,j; + int n = z->order[0]; + // non-interleaved data, we just need to process one block at a time, + // in trivial scanline order + // number of blocks to do just depends on how many actual "pixels" this + // component has, independent of interleaved MCU blocking and such + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w); + if (z->spec_start == 0) { + if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n)) + return 0; + } else { + int ha = z->img_comp[n].ha; + if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha])) + return 0; + } + // every data block is an MCU, so countdown the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } else { // interleaved + int i,j,k,x,y; + for (j=0; j < z->img_mcu_y; ++j) { + for (i=0; i < z->img_mcu_x; ++i) { + // scan an interleaved mcu... process scan_n components in order + for (k=0; k < z->scan_n; ++k) { + int n = z->order[k]; + // scan out an mcu's worth of this component; that's just determined + // by the basic H and V specified for the component + for (y=0; y < z->img_comp[n].v; ++y) { + for (x=0; x < z->img_comp[n].h; ++x) { + int x2 = (i*z->img_comp[n].h + x); + int y2 = (j*z->img_comp[n].v + y); + short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w); + if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n)) + return 0; + } + } + } + // after all interleaved components, that's an interleaved MCU, + // so now count down the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } } } @@ -2811,25 +2811,25 @@ static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant) { int i; for (i=0; i < 64; ++i) - data[i] *= dequant[i]; + data[i] *= dequant[i]; } static void stbi__jpeg_finish(stbi__jpeg *z) { if (z->progressive) { - // dequantize and idct the data - int i,j,n; - for (n=0; n < z->s->img_n; ++n) { - int w = (z->img_comp[n].x+7) >> 3; - int h = (z->img_comp[n].y+7) >> 3; - for (j=0; j < h; ++j) { - for (i=0; i < w; ++i) { - short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w); - stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]); - z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data); - } - } - } + // dequantize and idct the data + int i,j,n; + for (n=0; n < z->s->img_n; ++n) { + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w); + stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]); + z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data); + } + } + } } } @@ -2837,99 +2837,99 @@ static int stbi__process_marker(stbi__jpeg *z, int m) { int L; switch (m) { - case STBI__MARKER_none: // no marker found - return stbi__err("expected marker","Corrupt JPEG"); - - case 0xDD: // DRI - specify restart interval - if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG"); - z->restart_interval = stbi__get16be(z->s); - return 1; - - case 0xDB: // DQT - define quantization table - L = stbi__get16be(z->s)-2; - while (L > 0) { - int q = stbi__get8(z->s); - int p = q >> 4, sixteen = (p != 0); - int t = q & 15,i; - if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG"); - if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG"); - - for (i=0; i < 64; ++i) - z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s)); - L -= (sixteen ? 129 : 65); - } - return L==0; - - case 0xC4: // DHT - define huffman table - L = stbi__get16be(z->s)-2; - while (L > 0) { - stbi_uc *v; - int sizes[16],i,n=0; - int q = stbi__get8(z->s); - int tc = q >> 4; - int th = q & 15; - if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG"); - for (i=0; i < 16; ++i) { - sizes[i] = stbi__get8(z->s); - n += sizes[i]; - } - L -= 17; - if (tc == 0) { - if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0; - v = z->huff_dc[th].values; - } else { - if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0; - v = z->huff_ac[th].values; - } - for (i=0; i < n; ++i) - v[i] = stbi__get8(z->s); - if (tc != 0) - stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th); - L -= n; - } - return L==0; + case STBI__MARKER_none: // no marker found + return stbi__err("expected marker","Corrupt JPEG"); + + case 0xDD: // DRI - specify restart interval + if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG"); + z->restart_interval = stbi__get16be(z->s); + return 1; + + case 0xDB: // DQT - define quantization table + L = stbi__get16be(z->s)-2; + while (L > 0) { + int q = stbi__get8(z->s); + int p = q >> 4, sixteen = (p != 0); + int t = q & 15,i; + if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG"); + if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG"); + + for (i=0; i < 64; ++i) + z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s)); + L -= (sixteen ? 129 : 65); + } + return L==0; + + case 0xC4: // DHT - define huffman table + L = stbi__get16be(z->s)-2; + while (L > 0) { + stbi_uc *v; + int sizes[16],i,n=0; + int q = stbi__get8(z->s); + int tc = q >> 4; + int th = q & 15; + if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG"); + for (i=0; i < 16; ++i) { + sizes[i] = stbi__get8(z->s); + n += sizes[i]; + } + L -= 17; + if (tc == 0) { + if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0; + v = z->huff_dc[th].values; + } else { + if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0; + v = z->huff_ac[th].values; + } + for (i=0; i < n; ++i) + v[i] = stbi__get8(z->s); + if (tc != 0) + stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th); + L -= n; + } + return L==0; } // check for comment block or APP blocks if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) { - L = stbi__get16be(z->s); - if (L < 2) { - if (m == 0xFE) - return stbi__err("bad COM len","Corrupt JPEG"); - else - return stbi__err("bad APP len","Corrupt JPEG"); - } - L -= 2; - - if (m == 0xE0 && L >= 5) { // JFIF APP0 segment - static const unsigned char tag[5] = {'J','F','I','F','\0'}; - int ok = 1; - int i; - for (i=0; i < 5; ++i) - if (stbi__get8(z->s) != tag[i]) - ok = 0; - L -= 5; - if (ok) - z->jfif = 1; - } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment - static const unsigned char tag[6] = {'A','d','o','b','e','\0'}; - int ok = 1; - int i; - for (i=0; i < 6; ++i) - if (stbi__get8(z->s) != tag[i]) - ok = 0; - L -= 6; - if (ok) { - stbi__get8(z->s); // version - stbi__get16be(z->s); // flags0 - stbi__get16be(z->s); // flags1 - z->app14_color_transform = stbi__get8(z->s); // color transform - L -= 6; - } - } - - stbi__skip(z->s, L); - return 1; + L = stbi__get16be(z->s); + if (L < 2) { + if (m == 0xFE) + return stbi__err("bad COM len","Corrupt JPEG"); + else + return stbi__err("bad APP len","Corrupt JPEG"); + } + L -= 2; + + if (m == 0xE0 && L >= 5) { // JFIF APP0 segment + static const unsigned char tag[5] = {'J','F','I','F','\0'}; + int ok = 1; + int i; + for (i=0; i < 5; ++i) + if (stbi__get8(z->s) != tag[i]) + ok = 0; + L -= 5; + if (ok) + z->jfif = 1; + } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment + static const unsigned char tag[6] = {'A','d','o','b','e','\0'}; + int ok = 1; + int i; + for (i=0; i < 6; ++i) + if (stbi__get8(z->s) != tag[i]) + ok = 0; + L -= 6; + if (ok) { + stbi__get8(z->s); // version + stbi__get16be(z->s); // flags0 + stbi__get16be(z->s); // flags1 + z->app14_color_transform = stbi__get8(z->s); // color transform + L -= 6; + } + } + + stbi__skip(z->s, L); + return 1; } return stbi__err("unknown marker","Corrupt JPEG"); @@ -2944,32 +2944,32 @@ static int stbi__process_scan_header(stbi__jpeg *z) if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG"); if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG"); for (i=0; i < z->scan_n; ++i) { - int id = stbi__get8(z->s), which; - int q = stbi__get8(z->s); - for (which = 0; which < z->s->img_n; ++which) - if (z->img_comp[which].id == id) - break; - if (which == z->s->img_n) return 0; // no match - z->img_comp[which].hd = q >> 4; if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG"); - z->img_comp[which].ha = q & 15; if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG"); - z->order[i] = which; + int id = stbi__get8(z->s), which; + int q = stbi__get8(z->s); + for (which = 0; which < z->s->img_n; ++which) + if (z->img_comp[which].id == id) + break; + if (which == z->s->img_n) return 0; // no match + z->img_comp[which].hd = q >> 4; if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG"); + z->img_comp[which].ha = q & 15; if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG"); + z->order[i] = which; } { - int aa; - z->spec_start = stbi__get8(z->s); - z->spec_end = stbi__get8(z->s); // should be 63, but might be 0 - aa = stbi__get8(z->s); - z->succ_high = (aa >> 4); - z->succ_low = (aa & 15); - if (z->progressive) { - if (z->spec_start > 63 || z->spec_end > 63 || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13) - return stbi__err("bad SOS", "Corrupt JPEG"); - } else { - if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG"); - if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG"); - z->spec_end = 63; - } + int aa; + z->spec_start = stbi__get8(z->s); + z->spec_end = stbi__get8(z->s); // should be 63, but might be 0 + aa = stbi__get8(z->s); + z->succ_high = (aa >> 4); + z->succ_low = (aa & 15); + if (z->progressive) { + if (z->spec_start > 63 || z->spec_end > 63 || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13) + return stbi__err("bad SOS", "Corrupt JPEG"); + } else { + if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG"); + if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG"); + z->spec_end = 63; + } } return 1; @@ -2979,20 +2979,20 @@ static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why) { int i; for (i=0; i < ncomp; ++i) { - if (z->img_comp[i].raw_data) { - STBI_FREE(z->img_comp[i].raw_data); - z->img_comp[i].raw_data = NULL; - z->img_comp[i].data = NULL; - } - if (z->img_comp[i].raw_coeff) { - STBI_FREE(z->img_comp[i].raw_coeff); - z->img_comp[i].raw_coeff = 0; - z->img_comp[i].coeff = 0; - } - if (z->img_comp[i].linebuf) { - STBI_FREE(z->img_comp[i].linebuf); - z->img_comp[i].linebuf = NULL; - } + if (z->img_comp[i].raw_data) { + STBI_FREE(z->img_comp[i].raw_data); + z->img_comp[i].raw_data = NULL; + z->img_comp[i].data = NULL; + } + if (z->img_comp[i].raw_coeff) { + STBI_FREE(z->img_comp[i].raw_coeff); + z->img_comp[i].raw_coeff = 0; + z->img_comp[i].coeff = 0; + } + if (z->img_comp[i].linebuf) { + STBI_FREE(z->img_comp[i].linebuf); + z->img_comp[i].linebuf = NULL; + } } return why; } @@ -3001,30 +3001,30 @@ static int stbi__process_frame_header(stbi__jpeg *z, int scan) { stbi__context *s = z->s; int Lf,p,i,q, h_max=1,v_max=1,c; - Lf = stbi__get16be(s); if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG - p = stbi__get8(s); if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline - s->img_y = stbi__get16be(s); if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG - s->img_x = stbi__get16be(s); if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires + Lf = stbi__get16be(s); if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG + p = stbi__get8(s); if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline + s->img_y = stbi__get16be(s); if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG + s->img_x = stbi__get16be(s); if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires c = stbi__get8(s); if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count","Corrupt JPEG"); s->img_n = c; for (i=0; i < c; ++i) { - z->img_comp[i].data = NULL; - z->img_comp[i].linebuf = NULL; + z->img_comp[i].data = NULL; + z->img_comp[i].linebuf = NULL; } if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG"); z->rgb = 0; for (i=0; i < s->img_n; ++i) { - static const unsigned char rgb[3] = { 'R', 'G', 'B' }; - z->img_comp[i].id = stbi__get8(s); - if (s->img_n == 3 && z->img_comp[i].id == rgb[i]) - ++z->rgb; - q = stbi__get8(s); - z->img_comp[i].h = (q >> 4); if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG"); - z->img_comp[i].v = q & 15; if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG"); - z->img_comp[i].tq = stbi__get8(s); if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG"); + static const unsigned char rgb[3] = { 'R', 'G', 'B' }; + z->img_comp[i].id = stbi__get8(s); + if (s->img_n == 3 && z->img_comp[i].id == rgb[i]) + ++z->rgb; + q = stbi__get8(s); + z->img_comp[i].h = (q >> 4); if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG"); + z->img_comp[i].v = q & 15; if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG"); + z->img_comp[i].tq = stbi__get8(s); if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG"); } if (scan != STBI__SCAN_load) return 1; @@ -3032,8 +3032,8 @@ static int stbi__process_frame_header(stbi__jpeg *z, int scan) if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode"); for (i=0; i < s->img_n; ++i) { - if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h; - if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v; + if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h; + if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v; } // compute interleaved mcu info @@ -3046,46 +3046,46 @@ static int stbi__process_frame_header(stbi__jpeg *z, int scan) z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h; for (i=0; i < s->img_n; ++i) { - // number of effective pixels (e.g. for non-interleaved MCU) - z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max; - z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max; - // to simplify generation, we'll allocate enough memory to decode - // the bogus oversized data from using interleaved MCUs and their - // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't - // discard the extra data until colorspace conversion - // - // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier) - // so these muls can't overflow with 32-bit ints (which we require) - z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8; - z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8; - z->img_comp[i].coeff = 0; - z->img_comp[i].raw_coeff = 0; - z->img_comp[i].linebuf = NULL; - z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15); - if (z->img_comp[i].raw_data == NULL) - return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory")); - // align blocks for idct using mmx/sse - z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15); - if (z->progressive) { - // w2, h2 are multiples of 8 (see above) - z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8; - z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8; - z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15); - if (z->img_comp[i].raw_coeff == NULL) - return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory")); - z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15); - } + // number of effective pixels (e.g. for non-interleaved MCU) + z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max; + z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max; + // to simplify generation, we'll allocate enough memory to decode + // the bogus oversized data from using interleaved MCUs and their + // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't + // discard the extra data until colorspace conversion + // + // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier) + // so these muls can't overflow with 32-bit ints (which we require) + z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8; + z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8; + z->img_comp[i].coeff = 0; + z->img_comp[i].raw_coeff = 0; + z->img_comp[i].linebuf = NULL; + z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15); + if (z->img_comp[i].raw_data == NULL) + return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory")); + // align blocks for idct using mmx/sse + z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15); + if (z->progressive) { + // w2, h2 are multiples of 8 (see above) + z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8; + z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8; + z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15); + if (z->img_comp[i].raw_coeff == NULL) + return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory")); + z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15); + } } return 1; } // use comparisons since in some cases we handle more than one case (e.g. SOF) -#define stbi__DNL(x) ((x) == 0xdc) -#define stbi__SOI(x) ((x) == 0xd8) -#define stbi__EOI(x) ((x) == 0xd9) -#define stbi__SOF(x) ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2) -#define stbi__SOS(x) ((x) == 0xda) +#define stbi__DNL(x) ((x) == 0xdc) +#define stbi__SOI(x) ((x) == 0xd8) +#define stbi__EOI(x) ((x) == 0xd9) +#define stbi__SOF(x) ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2) +#define stbi__SOS(x) ((x) == 0xda) #define stbi__SOF_progressive(x) ((x) == 0xc2) @@ -3100,13 +3100,13 @@ static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan) if (scan == STBI__SCAN_type) return 1; m = stbi__get_marker(z); while (!stbi__SOF(m)) { - if (!stbi__process_marker(z,m)) return 0; - m = stbi__get_marker(z); - while (m == STBI__MARKER_none) { - // some files have extra padding after their blocks, so ok, we'll scan - if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG"); - m = stbi__get_marker(z); - } + if (!stbi__process_marker(z,m)) return 0; + m = stbi__get_marker(z); + while (m == STBI__MARKER_none) { + // some files have extra padding after their blocks, so ok, we'll scan + if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG"); + m = stbi__get_marker(z); + } } z->progressive = stbi__SOF_progressive(m); if (!stbi__process_frame_header(z, scan)) return 0; @@ -3118,46 +3118,46 @@ static int stbi__decode_jpeg_image(stbi__jpeg *j) { int m; for (m = 0; m < 4; m++) { - j->img_comp[m].raw_data = NULL; - j->img_comp[m].raw_coeff = NULL; + j->img_comp[m].raw_data = NULL; + j->img_comp[m].raw_coeff = NULL; } j->restart_interval = 0; if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0; m = stbi__get_marker(j); while (!stbi__EOI(m)) { - if (stbi__SOS(m)) { - if (!stbi__process_scan_header(j)) return 0; - if (!stbi__parse_entropy_coded_data(j)) return 0; - if (j->marker == STBI__MARKER_none ) { - // handle 0s at the end of image data from IP Kamera 9060 - while (!stbi__at_eof(j->s)) { - int x = stbi__get8(j->s); - if (x == 255) { - j->marker = stbi__get8(j->s); - break; - } - } - // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0 - } - } else if (stbi__DNL(m)) { - int Ld = stbi__get16be(j->s); - stbi__uint32 NL = stbi__get16be(j->s); - if (Ld != 4) return stbi__err("bad DNL len", "Corrupt JPEG"); - if (NL != j->s->img_y) return stbi__err("bad DNL height", "Corrupt JPEG"); - } else { - if (!stbi__process_marker(j, m)) return 0; - } - m = stbi__get_marker(j); + if (stbi__SOS(m)) { + if (!stbi__process_scan_header(j)) return 0; + if (!stbi__parse_entropy_coded_data(j)) return 0; + if (j->marker == STBI__MARKER_none ) { + // handle 0s at the end of image data from IP Kamera 9060 + while (!stbi__at_eof(j->s)) { + int x = stbi__get8(j->s); + if (x == 255) { + j->marker = stbi__get8(j->s); + break; + } + } + // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0 + } + } else if (stbi__DNL(m)) { + int Ld = stbi__get16be(j->s); + stbi__uint32 NL = stbi__get16be(j->s); + if (Ld != 4) return stbi__err("bad DNL len", "Corrupt JPEG"); + if (NL != j->s->img_y) return stbi__err("bad DNL height", "Corrupt JPEG"); + } else { + if (!stbi__process_marker(j, m)) return 0; + } + m = stbi__get_marker(j); } if (j->progressive) - stbi__jpeg_finish(j); + stbi__jpeg_finish(j); return 1; } // static jfif-centered resampling (across block boundaries) typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1, - int w, int hs); + int w, int hs); #define stbi__div4(x) ((stbi_uc) ((x) >> 2)) @@ -3176,28 +3176,28 @@ static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc * int i; STBI_NOTUSED(hs); for (i=0; i < w; ++i) - out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2); + out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2); return out; } -static stbi_uc* stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +static stbi_uc* stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) { // need to generate two samples horizontally for every one in input int i; stbi_uc *input = in_near; if (w == 1) { - // if only one sample, can't do any interpolation - out[0] = out[1] = input[0]; - return out; + // if only one sample, can't do any interpolation + out[0] = out[1] = input[0]; + return out; } out[0] = input[0]; out[1] = stbi__div4(input[0]*3 + input[1] + 2); for (i=1; i < w-1; ++i) { - int n = 3*input[i]+2; - out[i*2+0] = stbi__div4(n+input[i-1]); - out[i*2+1] = stbi__div4(n+input[i+1]); + int n = 3*input[i]+2; + out[i*2+0] = stbi__div4(n+input[i-1]); + out[i*2+1] = stbi__div4(n+input[i+1]); } out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2); out[i*2+1] = input[w-1]; @@ -3215,17 +3215,17 @@ static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc // need to generate 2x2 samples for every one in input int i,t0,t1; if (w == 1) { - out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2); - return out; + out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2); + return out; } t1 = 3*in_near[0] + in_far[0]; out[0] = stbi__div4(t1+2); for (i=1; i < w; ++i) { - t0 = t1; - t1 = 3*in_near[i]+in_far[i]; - out[i*2-1] = stbi__div16(3*t0 + t1 + 8); - out[i*2 ] = stbi__div16(3*t1 + t0 + 8); + t0 = t1; + t1 = 3*in_near[i]+in_far[i]; + out[i*2-1] = stbi__div16(3*t0 + t1 + 8); + out[i*2 ] = stbi__div16(3*t1 + t0 + 8); } out[w*2-1] = stbi__div4(t1+2); @@ -3241,8 +3241,8 @@ static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stb int i=0,t0,t1; if (w == 1) { - out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2); - return out; + out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2); + return out; } t1 = 3*in_near[0] + in_far[0]; @@ -3251,86 +3251,86 @@ static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stb // because we need to handle the filter boundary conditions. for (; i < ((w-1) & ~7); i += 8) { #if defined(STBI_SSE2) - // load and perform the vertical filtering pass - // this uses 3*x + y = 4*x + (y - x) - __m128i zero = _mm_setzero_si128(); - __m128i farb = _mm_loadl_epi64((__m128i *) (in_far + i)); - __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i)); - __m128i farw = _mm_unpacklo_epi8(farb, zero); - __m128i nearw = _mm_unpacklo_epi8(nearb, zero); - __m128i diff = _mm_sub_epi16(farw, nearw); - __m128i nears = _mm_slli_epi16(nearw, 2); - __m128i curr = _mm_add_epi16(nears, diff); // current row - - // horizontal filter works the same based on shifted vers of current - // row. "prev" is current row shifted right by 1 pixel; we need to - // insert the previous pixel value (from t1). - // "next" is current row shifted left by 1 pixel, with first pixel - // of next block of 8 pixels added in. - __m128i prv0 = _mm_slli_si128(curr, 2); - __m128i nxt0 = _mm_srli_si128(curr, 2); - __m128i prev = _mm_insert_epi16(prv0, t1, 0); - __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7); - - // horizontal filter, polyphase implementation since it's convenient: - // even pixels = 3*cur + prev = cur*4 + (prev - cur) - // odd pixels = 3*cur + next = cur*4 + (next - cur) - // note the shared term. - __m128i bias = _mm_set1_epi16(8); - __m128i curs = _mm_slli_epi16(curr, 2); - __m128i prvd = _mm_sub_epi16(prev, curr); - __m128i nxtd = _mm_sub_epi16(next, curr); - __m128i curb = _mm_add_epi16(curs, bias); - __m128i even = _mm_add_epi16(prvd, curb); - __m128i odd = _mm_add_epi16(nxtd, curb); - - // interleave even and odd pixels, then undo scaling. - __m128i int0 = _mm_unpacklo_epi16(even, odd); - __m128i int1 = _mm_unpackhi_epi16(even, odd); - __m128i de0 = _mm_srli_epi16(int0, 4); - __m128i de1 = _mm_srli_epi16(int1, 4); - - // pack and write output - __m128i outv = _mm_packus_epi16(de0, de1); - _mm_storeu_si128((__m128i *) (out + i*2), outv); + // load and perform the vertical filtering pass + // this uses 3*x + y = 4*x + (y - x) + __m128i zero = _mm_setzero_si128(); + __m128i farb = _mm_loadl_epi64((__m128i *) (in_far + i)); + __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i)); + __m128i farw = _mm_unpacklo_epi8(farb, zero); + __m128i nearw = _mm_unpacklo_epi8(nearb, zero); + __m128i diff = _mm_sub_epi16(farw, nearw); + __m128i nears = _mm_slli_epi16(nearw, 2); + __m128i curr = _mm_add_epi16(nears, diff); // current row + + // horizontal filter works the same based on shifted vers of current + // row. "prev" is current row shifted right by 1 pixel; we need to + // insert the previous pixel value (from t1). + // "next" is current row shifted left by 1 pixel, with first pixel + // of next block of 8 pixels added in. + __m128i prv0 = _mm_slli_si128(curr, 2); + __m128i nxt0 = _mm_srli_si128(curr, 2); + __m128i prev = _mm_insert_epi16(prv0, t1, 0); + __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7); + + // horizontal filter, polyphase implementation since it's convenient: + // even pixels = 3*cur + prev = cur*4 + (prev - cur) + // odd pixels = 3*cur + next = cur*4 + (next - cur) + // note the shared term. + __m128i bias = _mm_set1_epi16(8); + __m128i curs = _mm_slli_epi16(curr, 2); + __m128i prvd = _mm_sub_epi16(prev, curr); + __m128i nxtd = _mm_sub_epi16(next, curr); + __m128i curb = _mm_add_epi16(curs, bias); + __m128i even = _mm_add_epi16(prvd, curb); + __m128i odd = _mm_add_epi16(nxtd, curb); + + // interleave even and odd pixels, then undo scaling. + __m128i int0 = _mm_unpacklo_epi16(even, odd); + __m128i int1 = _mm_unpackhi_epi16(even, odd); + __m128i de0 = _mm_srli_epi16(int0, 4); + __m128i de1 = _mm_srli_epi16(int1, 4); + + // pack and write output + __m128i outv = _mm_packus_epi16(de0, de1); + _mm_storeu_si128((__m128i *) (out + i*2), outv); #elif defined(STBI_NEON) - // load and perform the vertical filtering pass - // this uses 3*x + y = 4*x + (y - x) - uint8x8_t farb = vld1_u8(in_far + i); - uint8x8_t nearb = vld1_u8(in_near + i); - int16x8_t diff = vreinterpretq_s16_u16(vsubl_u8(farb, nearb)); - int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2)); - int16x8_t curr = vaddq_s16(nears, diff); // current row - - // horizontal filter works the same based on shifted vers of current - // row. "prev" is current row shifted right by 1 pixel; we need to - // insert the previous pixel value (from t1). - // "next" is current row shifted left by 1 pixel, with first pixel - // of next block of 8 pixels added in. - int16x8_t prv0 = vextq_s16(curr, curr, 7); - int16x8_t nxt0 = vextq_s16(curr, curr, 1); - int16x8_t prev = vsetq_lane_s16(t1, prv0, 0); - int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7); - - // horizontal filter, polyphase implementation since it's convenient: - // even pixels = 3*cur + prev = cur*4 + (prev - cur) - // odd pixels = 3*cur + next = cur*4 + (next - cur) - // note the shared term. - int16x8_t curs = vshlq_n_s16(curr, 2); - int16x8_t prvd = vsubq_s16(prev, curr); - int16x8_t nxtd = vsubq_s16(next, curr); - int16x8_t even = vaddq_s16(curs, prvd); - int16x8_t odd = vaddq_s16(curs, nxtd); - - // undo scaling and round, then store with even/odd phases interleaved - uint8x8x2_t o; - o.val[0] = vqrshrun_n_s16(even, 4); - o.val[1] = vqrshrun_n_s16(odd, 4); - vst2_u8(out + i*2, o); + // load and perform the vertical filtering pass + // this uses 3*x + y = 4*x + (y - x) + uint8x8_t farb = vld1_u8(in_far + i); + uint8x8_t nearb = vld1_u8(in_near + i); + int16x8_t diff = vreinterpretq_s16_u16(vsubl_u8(farb, nearb)); + int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2)); + int16x8_t curr = vaddq_s16(nears, diff); // current row + + // horizontal filter works the same based on shifted vers of current + // row. "prev" is current row shifted right by 1 pixel; we need to + // insert the previous pixel value (from t1). + // "next" is current row shifted left by 1 pixel, with first pixel + // of next block of 8 pixels added in. + int16x8_t prv0 = vextq_s16(curr, curr, 7); + int16x8_t nxt0 = vextq_s16(curr, curr, 1); + int16x8_t prev = vsetq_lane_s16(t1, prv0, 0); + int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7); + + // horizontal filter, polyphase implementation since it's convenient: + // even pixels = 3*cur + prev = cur*4 + (prev - cur) + // odd pixels = 3*cur + next = cur*4 + (next - cur) + // note the shared term. + int16x8_t curs = vshlq_n_s16(curr, 2); + int16x8_t prvd = vsubq_s16(prev, curr); + int16x8_t nxtd = vsubq_s16(next, curr); + int16x8_t even = vaddq_s16(curs, prvd); + int16x8_t odd = vaddq_s16(curs, nxtd); + + // undo scaling and round, then store with even/odd phases interleaved + uint8x8x2_t o; + o.val[0] = vqrshrun_n_s16(even, 4); + o.val[1] = vqrshrun_n_s16(odd, 4); + vst2_u8(out + i*2, o); #endif - // "previous" value for next iter - t1 = 3*in_near[i+7] + in_far[i+7]; + // "previous" value for next iter + t1 = 3*in_near[i+7] + in_far[i+7]; } t0 = t1; @@ -3338,10 +3338,10 @@ static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stb out[i*2] = stbi__div16(3*t1 + t0 + 8); for (++i; i < w; ++i) { - t0 = t1; - t1 = 3*in_near[i]+in_far[i]; - out[i*2-1] = stbi__div16(3*t0 + t1 + 8); - out[i*2 ] = stbi__div16(3*t1 + t0 + 8); + t0 = t1; + t1 = 3*in_near[i]+in_far[i]; + out[i*2-1] = stbi__div16(3*t0 + t1 + 8); + out[i*2 ] = stbi__div16(3*t1 + t0 + 8); } out[w*2-1] = stbi__div4(t1+2); @@ -3357,8 +3357,8 @@ static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_ int i,j; STBI_NOTUSED(in_far); for (i=0; i < w; ++i) - for (j=0; j < hs; ++j) - out[i*hs+j] = in_near[i]; + for (j=0; j < hs; ++j) + out[i*hs+j] = in_near[i]; return out; } @@ -3369,24 +3369,24 @@ static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc { int i; for (i=0; i < count; ++i) { - int y_fixed = (y[i] << 20) + (1<<19); // rounding - int r,g,b; - int cr = pcr[i] - 128; - int cb = pcb[i] - 128; - r = y_fixed + cr* stbi__float2fixed(1.40200f); - g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000); - b = y_fixed + cb* stbi__float2fixed(1.77200f); - r >>= 20; - g >>= 20; - b >>= 20; - if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; } - if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; } - if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; } - out[0] = (stbi_uc)r; - out[1] = (stbi_uc)g; - out[2] = (stbi_uc)b; - out[3] = 255; - out += step; + int y_fixed = (y[i] << 20) + (1<<19); // rounding + int r,g,b; + int cr = pcr[i] - 128; + int cb = pcb[i] - 128; + r = y_fixed + cr* stbi__float2fixed(1.40200f); + g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000); + b = y_fixed + cb* stbi__float2fixed(1.77200f); + r >>= 20; + g >>= 20; + b >>= 20; + if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; } + if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; } + if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; } + out[0] = (stbi_uc)r; + out[1] = (stbi_uc)g; + out[2] = (stbi_uc)b; + out[3] = 255; + out += step; } } @@ -3400,127 +3400,127 @@ static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc cons // it's useful in practice (you wouldn't use it for textures, for example). // so just accelerate step == 4 case. if (step == 4) { - // this is a fairly straightforward implementation and not super-optimized. - __m128i signflip = _mm_set1_epi8(-0x80); - __m128i cr_const0 = _mm_set1_epi16( (short) ( 1.40200f*4096.0f+0.5f)); - __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f)); - __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f)); - __m128i cb_const1 = _mm_set1_epi16( (short) ( 1.77200f*4096.0f+0.5f)); - __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128); - __m128i xw = _mm_set1_epi16(255); // alpha channel - - for (; i+7 < count; i += 8) { - // load - __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i)); - __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i)); - __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i)); - __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128 - __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128 - - // unpack to short (and left-shift cr, cb by 8) - __m128i yw = _mm_unpacklo_epi8(y_bias, y_bytes); - __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased); - __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased); - - // color transform - __m128i yws = _mm_srli_epi16(yw, 4); - __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw); - __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw); - __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1); - __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1); - __m128i rws = _mm_add_epi16(cr0, yws); - __m128i gwt = _mm_add_epi16(cb0, yws); - __m128i bws = _mm_add_epi16(yws, cb1); - __m128i gws = _mm_add_epi16(gwt, cr1); - - // descale - __m128i rw = _mm_srai_epi16(rws, 4); - __m128i bw = _mm_srai_epi16(bws, 4); - __m128i gw = _mm_srai_epi16(gws, 4); - - // back to byte, set up for transpose - __m128i brb = _mm_packus_epi16(rw, bw); - __m128i gxb = _mm_packus_epi16(gw, xw); - - // transpose to interleave channels - __m128i t0 = _mm_unpacklo_epi8(brb, gxb); - __m128i t1 = _mm_unpackhi_epi8(brb, gxb); - __m128i o0 = _mm_unpacklo_epi16(t0, t1); - __m128i o1 = _mm_unpackhi_epi16(t0, t1); - - // store - _mm_storeu_si128((__m128i *) (out + 0), o0); - _mm_storeu_si128((__m128i *) (out + 16), o1); - out += 32; - } + // this is a fairly straightforward implementation and not super-optimized. + __m128i signflip = _mm_set1_epi8(-0x80); + __m128i cr_const0 = _mm_set1_epi16( (short) ( 1.40200f*4096.0f+0.5f)); + __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f)); + __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f)); + __m128i cb_const1 = _mm_set1_epi16( (short) ( 1.77200f*4096.0f+0.5f)); + __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128); + __m128i xw = _mm_set1_epi16(255); // alpha channel + + for (; i+7 < count; i += 8) { + // load + __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i)); + __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i)); + __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i)); + __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128 + __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128 + + // unpack to short (and left-shift cr, cb by 8) + __m128i yw = _mm_unpacklo_epi8(y_bias, y_bytes); + __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased); + __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased); + + // color transform + __m128i yws = _mm_srli_epi16(yw, 4); + __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw); + __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw); + __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1); + __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1); + __m128i rws = _mm_add_epi16(cr0, yws); + __m128i gwt = _mm_add_epi16(cb0, yws); + __m128i bws = _mm_add_epi16(yws, cb1); + __m128i gws = _mm_add_epi16(gwt, cr1); + + // descale + __m128i rw = _mm_srai_epi16(rws, 4); + __m128i bw = _mm_srai_epi16(bws, 4); + __m128i gw = _mm_srai_epi16(gws, 4); + + // back to byte, set up for transpose + __m128i brb = _mm_packus_epi16(rw, bw); + __m128i gxb = _mm_packus_epi16(gw, xw); + + // transpose to interleave channels + __m128i t0 = _mm_unpacklo_epi8(brb, gxb); + __m128i t1 = _mm_unpackhi_epi8(brb, gxb); + __m128i o0 = _mm_unpacklo_epi16(t0, t1); + __m128i o1 = _mm_unpackhi_epi16(t0, t1); + + // store + _mm_storeu_si128((__m128i *) (out + 0), o0); + _mm_storeu_si128((__m128i *) (out + 16), o1); + out += 32; + } } #endif #ifdef STBI_NEON // in this version, step=3 support would be easy to add. but is there demand? if (step == 4) { - // this is a fairly straightforward implementation and not super-optimized. - uint8x8_t signflip = vdup_n_u8(0x80); - int16x8_t cr_const0 = vdupq_n_s16( (short) ( 1.40200f*4096.0f+0.5f)); - int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f)); - int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f)); - int16x8_t cb_const1 = vdupq_n_s16( (short) ( 1.77200f*4096.0f+0.5f)); - - for (; i+7 < count; i += 8) { - // load - uint8x8_t y_bytes = vld1_u8(y + i); - uint8x8_t cr_bytes = vld1_u8(pcr + i); - uint8x8_t cb_bytes = vld1_u8(pcb + i); - int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip)); - int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip)); - - // expand to s16 - int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4)); - int16x8_t crw = vshll_n_s8(cr_biased, 7); - int16x8_t cbw = vshll_n_s8(cb_biased, 7); - - // color transform - int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0); - int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0); - int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1); - int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1); - int16x8_t rws = vaddq_s16(yws, cr0); - int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1); - int16x8_t bws = vaddq_s16(yws, cb1); - - // undo scaling, round, convert to byte - uint8x8x4_t o; - o.val[0] = vqrshrun_n_s16(rws, 4); - o.val[1] = vqrshrun_n_s16(gws, 4); - o.val[2] = vqrshrun_n_s16(bws, 4); - o.val[3] = vdup_n_u8(255); - - // store, interleaving r/g/b/a - vst4_u8(out, o); - out += 8*4; - } + // this is a fairly straightforward implementation and not super-optimized. + uint8x8_t signflip = vdup_n_u8(0x80); + int16x8_t cr_const0 = vdupq_n_s16( (short) ( 1.40200f*4096.0f+0.5f)); + int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f)); + int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f)); + int16x8_t cb_const1 = vdupq_n_s16( (short) ( 1.77200f*4096.0f+0.5f)); + + for (; i+7 < count; i += 8) { + // load + uint8x8_t y_bytes = vld1_u8(y + i); + uint8x8_t cr_bytes = vld1_u8(pcr + i); + uint8x8_t cb_bytes = vld1_u8(pcb + i); + int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip)); + int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip)); + + // expand to s16 + int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4)); + int16x8_t crw = vshll_n_s8(cr_biased, 7); + int16x8_t cbw = vshll_n_s8(cb_biased, 7); + + // color transform + int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0); + int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0); + int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1); + int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1); + int16x8_t rws = vaddq_s16(yws, cr0); + int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1); + int16x8_t bws = vaddq_s16(yws, cb1); + + // undo scaling, round, convert to byte + uint8x8x4_t o; + o.val[0] = vqrshrun_n_s16(rws, 4); + o.val[1] = vqrshrun_n_s16(gws, 4); + o.val[2] = vqrshrun_n_s16(bws, 4); + o.val[3] = vdup_n_u8(255); + + // store, interleaving r/g/b/a + vst4_u8(out, o); + out += 8*4; + } } #endif for (; i < count; ++i) { - int y_fixed = (y[i] << 20) + (1<<19); // rounding - int r,g,b; - int cr = pcr[i] - 128; - int cb = pcb[i] - 128; - r = y_fixed + cr* stbi__float2fixed(1.40200f); - g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000); - b = y_fixed + cb* stbi__float2fixed(1.77200f); - r >>= 20; - g >>= 20; - b >>= 20; - if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; } - if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; } - if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; } - out[0] = (stbi_uc)r; - out[1] = (stbi_uc)g; - out[2] = (stbi_uc)b; - out[3] = 255; - out += step; + int y_fixed = (y[i] << 20) + (1<<19); // rounding + int r,g,b; + int cr = pcr[i] - 128; + int cb = pcb[i] - 128; + r = y_fixed + cr* stbi__float2fixed(1.40200f); + g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000); + b = y_fixed + cb* stbi__float2fixed(1.77200f); + r >>= 20; + g >>= 20; + b >>= 20; + if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; } + if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; } + if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; } + out[0] = (stbi_uc)r; + out[1] = (stbi_uc)g; + out[2] = (stbi_uc)b; + out[3] = 255; + out += step; } } #endif @@ -3534,9 +3534,9 @@ static void stbi__setup_jpeg(stbi__jpeg *j) #ifdef STBI_SSE2 if (stbi__sse2_available()) { - j->idct_block_kernel = stbi__idct_simd; - j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd; - j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd; + j->idct_block_kernel = stbi__idct_simd; + j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd; + j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd; } #endif @@ -3557,10 +3557,10 @@ typedef struct { resample_row_func resample; stbi_uc *line0,*line1; - int hs,vs; // expansion factor in each axis + int hs,vs; // expansion factor in each axis int w_lores; // horizontal pixels pre-expansion - int ystep; // how far through vertical expansion we are - int ypos; // which pre-expansion row we're on + int ystep; // how far through vertical expansion we are + int ypos; // which pre-expansion row we're on } stbi__resample; // fast 0..255 * 0..255 => 0..255 rounded multiplication @@ -3587,145 +3587,145 @@ static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif)); if (z->s->img_n == 3 && n < 3 && !is_rgb) - decode_n = 1; + decode_n = 1; else - decode_n = z->s->img_n; + decode_n = z->s->img_n; // resample and color-convert { - int k; - unsigned int i,j; - stbi_uc *output; - stbi_uc *coutput[4]; - - stbi__resample res_comp[4]; - - for (k=0; k < decode_n; ++k) { - stbi__resample *r = &res_comp[k]; - - // allocate line buffer big enough for upsampling off the edges - // with upsample factor of 4 - z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3); - if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); } - - r->hs = z->img_h_max / z->img_comp[k].h; - r->vs = z->img_v_max / z->img_comp[k].v; - r->ystep = r->vs >> 1; - r->w_lores = (z->s->img_x + r->hs-1) / r->hs; - r->ypos = 0; - r->line0 = r->line1 = z->img_comp[k].data; - - if (r->hs == 1 && r->vs == 1) r->resample = resample_row_1; - else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2; - else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2; - else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel; - else r->resample = stbi__resample_row_generic; - } - - // can't error after this so, this is safe - output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1); - if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); } - - // now go ahead and resample - for (j=0; j < z->s->img_y; ++j) { - stbi_uc *out = output + n * z->s->img_x * j; - for (k=0; k < decode_n; ++k) { - stbi__resample *r = &res_comp[k]; - int y_bot = r->ystep >= (r->vs >> 1); - coutput[k] = r->resample(z->img_comp[k].linebuf, - y_bot ? r->line1 : r->line0, - y_bot ? r->line0 : r->line1, - r->w_lores, r->hs); - if (++r->ystep >= r->vs) { - r->ystep = 0; - r->line0 = r->line1; - if (++r->ypos < z->img_comp[k].y) - r->line1 += z->img_comp[k].w2; - } - } - if (n >= 3) { - stbi_uc *y = coutput[0]; - if (z->s->img_n == 3) { - if (is_rgb) { - for (i=0; i < z->s->img_x; ++i) { - out[0] = y[i]; - out[1] = coutput[1][i]; - out[2] = coutput[2][i]; - out[3] = 255; - out += n; - } - } else { - z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); - } - } else if (z->s->img_n == 4) { - if (z->app14_color_transform == 0) { // CMYK - for (i=0; i < z->s->img_x; ++i) { - stbi_uc m = coutput[3][i]; - out[0] = stbi__blinn_8x8(coutput[0][i], m); - out[1] = stbi__blinn_8x8(coutput[1][i], m); - out[2] = stbi__blinn_8x8(coutput[2][i], m); - out[3] = 255; - out += n; - } - } else if (z->app14_color_transform == 2) { // YCCK - z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); - for (i=0; i < z->s->img_x; ++i) { - stbi_uc m = coutput[3][i]; - out[0] = stbi__blinn_8x8(255 - out[0], m); - out[1] = stbi__blinn_8x8(255 - out[1], m); - out[2] = stbi__blinn_8x8(255 - out[2], m); - out += n; - } - } else { // YCbCr + alpha? Ignore the fourth channel for now - z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); - } - } else - for (i=0; i < z->s->img_x; ++i) { - out[0] = out[1] = out[2] = y[i]; - out[3] = 255; // not used if n==3 - out += n; - } - } else { - if (is_rgb) { - if (n == 1) - for (i=0; i < z->s->img_x; ++i) - *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]); - else { - for (i=0; i < z->s->img_x; ++i, out += 2) { - out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]); - out[1] = 255; - } - } - } else if (z->s->img_n == 4 && z->app14_color_transform == 0) { - for (i=0; i < z->s->img_x; ++i) { - stbi_uc m = coutput[3][i]; - stbi_uc r = stbi__blinn_8x8(coutput[0][i], m); - stbi_uc g = stbi__blinn_8x8(coutput[1][i], m); - stbi_uc b = stbi__blinn_8x8(coutput[2][i], m); - out[0] = stbi__compute_y(r, g, b); - out[1] = 255; - out += n; - } - } else if (z->s->img_n == 4 && z->app14_color_transform == 2) { - for (i=0; i < z->s->img_x; ++i) { - out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]); - out[1] = 255; - out += n; - } - } else { - stbi_uc *y = coutput[0]; - if (n == 1) - for (i=0; i < z->s->img_x; ++i) out[i] = y[i]; - else - for (i=0; i < z->s->img_x; ++i) *out++ = y[i], *out++ = 255; - } - } - } - stbi__cleanup_jpeg(z); - *out_x = z->s->img_x; - *out_y = z->s->img_y; - if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output - return output; + int k; + unsigned int i,j; + stbi_uc *output; + stbi_uc *coutput[4]; + + stbi__resample res_comp[4]; + + for (k=0; k < decode_n; ++k) { + stbi__resample *r = &res_comp[k]; + + // allocate line buffer big enough for upsampling off the edges + // with upsample factor of 4 + z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3); + if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); } + + r->hs = z->img_h_max / z->img_comp[k].h; + r->vs = z->img_v_max / z->img_comp[k].v; + r->ystep = r->vs >> 1; + r->w_lores = (z->s->img_x + r->hs-1) / r->hs; + r->ypos = 0; + r->line0 = r->line1 = z->img_comp[k].data; + + if (r->hs == 1 && r->vs == 1) r->resample = resample_row_1; + else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2; + else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2; + else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel; + else r->resample = stbi__resample_row_generic; + } + + // can't error after this so, this is safe + output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1); + if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); } + + // now go ahead and resample + for (j=0; j < z->s->img_y; ++j) { + stbi_uc *out = output + n * z->s->img_x * j; + for (k=0; k < decode_n; ++k) { + stbi__resample *r = &res_comp[k]; + int y_bot = r->ystep >= (r->vs >> 1); + coutput[k] = r->resample(z->img_comp[k].linebuf, + y_bot ? r->line1 : r->line0, + y_bot ? r->line0 : r->line1, + r->w_lores, r->hs); + if (++r->ystep >= r->vs) { + r->ystep = 0; + r->line0 = r->line1; + if (++r->ypos < z->img_comp[k].y) + r->line1 += z->img_comp[k].w2; + } + } + if (n >= 3) { + stbi_uc *y = coutput[0]; + if (z->s->img_n == 3) { + if (is_rgb) { + for (i=0; i < z->s->img_x; ++i) { + out[0] = y[i]; + out[1] = coutput[1][i]; + out[2] = coutput[2][i]; + out[3] = 255; + out += n; + } + } else { + z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); + } + } else if (z->s->img_n == 4) { + if (z->app14_color_transform == 0) { // CMYK + for (i=0; i < z->s->img_x; ++i) { + stbi_uc m = coutput[3][i]; + out[0] = stbi__blinn_8x8(coutput[0][i], m); + out[1] = stbi__blinn_8x8(coutput[1][i], m); + out[2] = stbi__blinn_8x8(coutput[2][i], m); + out[3] = 255; + out += n; + } + } else if (z->app14_color_transform == 2) { // YCCK + z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); + for (i=0; i < z->s->img_x; ++i) { + stbi_uc m = coutput[3][i]; + out[0] = stbi__blinn_8x8(255 - out[0], m); + out[1] = stbi__blinn_8x8(255 - out[1], m); + out[2] = stbi__blinn_8x8(255 - out[2], m); + out += n; + } + } else { // YCbCr + alpha? Ignore the fourth channel for now + z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); + } + } else + for (i=0; i < z->s->img_x; ++i) { + out[0] = out[1] = out[2] = y[i]; + out[3] = 255; // not used if n==3 + out += n; + } + } else { + if (is_rgb) { + if (n == 1) + for (i=0; i < z->s->img_x; ++i) + *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]); + else { + for (i=0; i < z->s->img_x; ++i, out += 2) { + out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]); + out[1] = 255; + } + } + } else if (z->s->img_n == 4 && z->app14_color_transform == 0) { + for (i=0; i < z->s->img_x; ++i) { + stbi_uc m = coutput[3][i]; + stbi_uc r = stbi__blinn_8x8(coutput[0][i], m); + stbi_uc g = stbi__blinn_8x8(coutput[1][i], m); + stbi_uc b = stbi__blinn_8x8(coutput[2][i], m); + out[0] = stbi__compute_y(r, g, b); + out[1] = 255; + out += n; + } + } else if (z->s->img_n == 4 && z->app14_color_transform == 2) { + for (i=0; i < z->s->img_x; ++i) { + out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]); + out[1] = 255; + out += n; + } + } else { + stbi_uc *y = coutput[0]; + if (n == 1) + for (i=0; i < z->s->img_x; ++i) out[i] = y[i]; + else + for (i=0; i < z->s->img_x; ++i) *out++ = y[i], *out++ = 255; + } + } + } + stbi__cleanup_jpeg(z); + *out_x = z->s->img_x; + *out_y = z->s->img_y; + if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output + return output; } } @@ -3756,8 +3756,8 @@ static int stbi__jpeg_test(stbi__context *s) static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp) { if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) { - stbi__rewind( j->s ); - return 0; + stbi__rewind( j->s ); + return 0; } if (x) *x = j->s->img_x; if (y) *y = j->s->img_y; @@ -3776,12 +3776,12 @@ static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp) } #endif -// public domain zlib decode v0.2 Sean Barrett 2006-11-18 -// simple implementation -// - all input must be provided in an upfront buffer -// - all output is written to a single output buffer (can malloc/realloc) -// performance -// - fast huffman +// public domain zlib decode v0.2 Sean Barrett 2006-11-18 +// simple implementation +// - all input must be provided in an upfront buffer +// - all output is written to a single output buffer (can malloc/realloc) +// performance +// - fast huffman #ifndef STBI_NO_ZLIB @@ -3797,16 +3797,16 @@ typedef struct stbi__uint16 firstcode[16]; int maxcode[17]; stbi__uint16 firstsymbol[16]; - stbi_uc size[288]; + stbi_uc size[288]; stbi__uint16 value[288]; } stbi__zhuffman; stbi_inline static int stbi__bitreverse16(int n) { - n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1); - n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2); - n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4); - n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8); + n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1); + n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2); + n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4); + n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8); return n; } @@ -3827,49 +3827,49 @@ static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int memset(sizes, 0, sizeof(sizes)); memset(z->fast, 0, sizeof(z->fast)); for (i=0; i < num; ++i) - ++sizes[sizelist[i]]; + ++sizes[sizelist[i]]; sizes[0] = 0; for (i=1; i < 16; ++i) - if (sizes[i] > (1 << i)) - return stbi__err("bad sizes", "Corrupt PNG"); + if (sizes[i] > (1 << i)) + return stbi__err("bad sizes", "Corrupt PNG"); code = 0; for (i=1; i < 16; ++i) { - next_code[i] = code; - z->firstcode[i] = (stbi__uint16) code; - z->firstsymbol[i] = (stbi__uint16) k; - code = (code + sizes[i]); - if (sizes[i]) - if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG"); - z->maxcode[i] = code << (16-i); // preshift for inner loop - code <<= 1; - k += sizes[i]; + next_code[i] = code; + z->firstcode[i] = (stbi__uint16) code; + z->firstsymbol[i] = (stbi__uint16) k; + code = (code + sizes[i]); + if (sizes[i]) + if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG"); + z->maxcode[i] = code << (16-i); // preshift for inner loop + code <<= 1; + k += sizes[i]; } z->maxcode[16] = 0x10000; // sentinel for (i=0; i < num; ++i) { - int s = sizelist[i]; - if (s) { - int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s]; - stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i); - z->size [c] = (stbi_uc ) s; - z->value[c] = (stbi__uint16) i; - if (s <= STBI__ZFAST_BITS) { - int j = stbi__bit_reverse(next_code[s],s); - while (j < (1 << STBI__ZFAST_BITS)) { - z->fast[j] = fastv; - j += (1 << s); - } - } - ++next_code[s]; - } + int s = sizelist[i]; + if (s) { + int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s]; + stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i); + z->size [c] = (stbi_uc ) s; + z->value[c] = (stbi__uint16) i; + if (s <= STBI__ZFAST_BITS) { + int j = stbi__bit_reverse(next_code[s],s); + while (j < (1 << STBI__ZFAST_BITS)) { + z->fast[j] = fastv; + j += (1 << s); + } + } + ++next_code[s]; + } } return 1; } // zlib-from-memory implementation for PNG reading -// because PNG allows splitting the zlib stream arbitrarily, -// and it's annoying structurally to have PNG call ZLIB call PNG, -// we require PNG read all the IDATs and combine them into a single -// memory buffer +// because PNG allows splitting the zlib stream arbitrarily, +// and it's annoying structurally to have PNG call ZLIB call PNG, +// we require PNG read all the IDATs and combine them into a single +// memory buffer typedef struct { @@ -3880,7 +3880,7 @@ typedef struct char *zout; char *zout_start; char *zout_end; - int z_expandable; + int z_expandable; stbi__zhuffman z_length, z_distance; } stbi__zbuf; @@ -3894,9 +3894,9 @@ stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z) static void stbi__fill_bits(stbi__zbuf *z) { do { - STBI_ASSERT(z->code_buffer < (1U << z->num_bits)); - z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits; - z->num_bits += 8; + STBI_ASSERT(z->code_buffer < (1U << z->num_bits)); + z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits; + z->num_bits += 8; } while (z->num_bits <= 24); } @@ -3917,8 +3917,8 @@ static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z) // use jpeg approach, which requires MSbits at top k = stbi__bit_reverse(a->code_buffer, 16); for (s=STBI__ZFAST_BITS+1; ; ++s) - if (k < z->maxcode[s]) - break; + if (k < z->maxcode[s]) + break; if (s == 16) return -1; // invalid code! // code size is s, so: b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s]; @@ -3934,30 +3934,30 @@ stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z) if (a->num_bits < 16) stbi__fill_bits(a); b = z->fast[a->code_buffer & STBI__ZFAST_MASK]; if (b) { - s = b >> 9; - a->code_buffer >>= s; - a->num_bits -= s; - return b & 511; + s = b >> 9; + a->code_buffer >>= s; + a->num_bits -= s; + return b & 511; } return stbi__zhuffman_decode_slowpath(a, z); } -static int stbi__zexpand(stbi__zbuf *z, char *zout, int n) // need to make room for n bytes +static int stbi__zexpand(stbi__zbuf *z, char *zout, int n) // need to make room for n bytes { char *q; int cur, limit, old_limit; z->zout = zout; if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG"); - cur = (int) (z->zout - z->zout_start); + cur = (int) (z->zout - z->zout_start); limit = old_limit = (int) (z->zout_end - z->zout_start); while (cur + n > limit) - limit *= 2; + limit *= 2; q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit); STBI_NOTUSED(old_limit); if (q == NULL) return stbi__err("outofmem", "Out of memory"); z->zout_start = q; - z->zout = q + cur; - z->zout_end = q + limit; + z->zout = q + cur; + z->zout_end = q + limit; return 1; } @@ -3979,41 +3979,41 @@ static int stbi__parse_huffman_block(stbi__zbuf *a) { char *zout = a->zout; for(;;) { - int z = stbi__zhuffman_decode(a, &a->z_length); - if (z < 256) { - if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes - if (zout >= a->zout_end) { - if (!stbi__zexpand(a, zout, 1)) return 0; - zout = a->zout; - } - *zout++ = (char) z; - } else { - stbi_uc *p; - int len,dist; - if (z == 256) { - a->zout = zout; - return 1; - } - z -= 257; - len = stbi__zlength_base[z]; - if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]); - z = stbi__zhuffman_decode(a, &a->z_distance); - if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); - dist = stbi__zdist_base[z]; - if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]); - if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG"); - if (zout + len > a->zout_end) { - if (!stbi__zexpand(a, zout, len)) return 0; - zout = a->zout; - } - p = (stbi_uc *) (zout - dist); - if (dist == 1) { // run of one byte; common in images. - stbi_uc v = *p; - if (len) { do *zout++ = v; while (--len); } - } else { - if (len) { do *zout++ = *p++; while (--len); } - } - } + int z = stbi__zhuffman_decode(a, &a->z_length); + if (z < 256) { + if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes + if (zout >= a->zout_end) { + if (!stbi__zexpand(a, zout, 1)) return 0; + zout = a->zout; + } + *zout++ = (char) z; + } else { + stbi_uc *p; + int len,dist; + if (z == 256) { + a->zout = zout; + return 1; + } + z -= 257; + len = stbi__zlength_base[z]; + if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]); + z = stbi__zhuffman_decode(a, &a->z_distance); + if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); + dist = stbi__zdist_base[z]; + if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]); + if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG"); + if (zout + len > a->zout_end) { + if (!stbi__zexpand(a, zout, len)) return 0; + zout = a->zout; + } + p = (stbi_uc *) (zout - dist); + if (dist == 1) { // run of one byte; common in images. + stbi_uc v = *p; + if (len) { do *zout++ = v; while (--len); } + } else { + if (len) { do *zout++ = *p++; while (--len); } + } + } } } @@ -4025,40 +4025,40 @@ static int stbi__compute_huffman_codes(stbi__zbuf *a) stbi_uc codelength_sizes[19]; int i,n; - int hlit = stbi__zreceive(a,5) + 257; + int hlit = stbi__zreceive(a,5) + 257; int hdist = stbi__zreceive(a,5) + 1; int hclen = stbi__zreceive(a,4) + 4; - int ntot = hlit + hdist; + int ntot = hlit + hdist; memset(codelength_sizes, 0, sizeof(codelength_sizes)); for (i=0; i < hclen; ++i) { - int s = stbi__zreceive(a,3); - codelength_sizes[length_dezigzag[i]] = (stbi_uc) s; + int s = stbi__zreceive(a,3); + codelength_sizes[length_dezigzag[i]] = (stbi_uc) s; } if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0; n = 0; while (n < ntot) { - int c = stbi__zhuffman_decode(a, &z_codelength); - if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG"); - if (c < 16) - lencodes[n++] = (stbi_uc) c; - else { - stbi_uc fill = 0; - if (c == 16) { - c = stbi__zreceive(a,2)+3; - if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG"); - fill = lencodes[n-1]; - } else if (c == 17) - c = stbi__zreceive(a,3)+3; - else { - STBI_ASSERT(c == 18); - c = stbi__zreceive(a,7)+11; - } - if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG"); - memset(lencodes+n, fill, c); - n += c; - } + int c = stbi__zhuffman_decode(a, &z_codelength); + if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG"); + if (c < 16) + lencodes[n++] = (stbi_uc) c; + else { + stbi_uc fill = 0; + if (c == 16) { + c = stbi__zreceive(a,2)+3; + if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG"); + fill = lencodes[n-1]; + } else if (c == 17) + c = stbi__zreceive(a,3)+3; + else { + STBI_ASSERT(c == 18); + c = stbi__zreceive(a,7)+11; + } + if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG"); + memset(lencodes+n, fill, c); + n += c; + } } if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG"); if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0; @@ -4071,24 +4071,24 @@ static int stbi__parse_uncompressed_block(stbi__zbuf *a) stbi_uc header[4]; int len,nlen,k; if (a->num_bits & 7) - stbi__zreceive(a, a->num_bits & 7); // discard + stbi__zreceive(a, a->num_bits & 7); // discard // drain the bit-packed data into header k = 0; while (a->num_bits > 0) { - header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check - a->code_buffer >>= 8; - a->num_bits -= 8; + header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check + a->code_buffer >>= 8; + a->num_bits -= 8; } STBI_ASSERT(a->num_bits == 0); // now fill header the normal way while (k < 4) - header[k++] = stbi__zget8(a); - len = header[1] * 256 + header[0]; + header[k++] = stbi__zget8(a); + len = header[1] * 256 + header[0]; nlen = header[3] * 256 + header[2]; if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG"); if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG"); if (a->zout + len > a->zout_end) - if (!stbi__zexpand(a, a->zout, len)) return 0; + if (!stbi__zexpand(a, a->zout, len)) return 0; memcpy(a->zout, a->zbuffer, len); a->zbuffer += len; a->zout += len; @@ -4097,10 +4097,10 @@ static int stbi__parse_uncompressed_block(stbi__zbuf *a) static int stbi__parse_zlib_header(stbi__zbuf *a) { - int cmf = stbi__zget8(a); - int cm = cmf & 15; + int cmf = stbi__zget8(a); + int cm = cmf & 15; /* int cinfo = cmf >> 4; */ - int flg = stbi__zget8(a); + int flg = stbi__zget8(a); if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png @@ -4127,13 +4127,13 @@ static const stbi_uc stbi__zdefault_distance[32] = /* Init algorithm: { - int i; // use <= to match clearly with spec - for (i=0; i <= 143; ++i) stbi__zdefault_length[i] = 8; - for ( ; i <= 255; ++i) stbi__zdefault_length[i] = 9; - for ( ; i <= 279; ++i) stbi__zdefault_length[i] = 7; - for ( ; i <= 287; ++i) stbi__zdefault_length[i] = 8; + int i; // use <= to match clearly with spec + for (i=0; i <= 143; ++i) stbi__zdefault_length[i] = 8; + for ( ; i <= 255; ++i) stbi__zdefault_length[i] = 9; + for ( ; i <= 279; ++i) stbi__zdefault_length[i] = 7; + for ( ; i <= 287; ++i) stbi__zdefault_length[i] = 8; - for (i=0; i <= 31; ++i) stbi__zdefault_distance[i] = 5; + for (i=0; i <= 31; ++i) stbi__zdefault_distance[i] = 5; } */ @@ -4141,26 +4141,26 @@ static int stbi__parse_zlib(stbi__zbuf *a, int parse_header) { int final, type; if (parse_header) - if (!stbi__parse_zlib_header(a)) return 0; + if (!stbi__parse_zlib_header(a)) return 0; a->num_bits = 0; a->code_buffer = 0; do { - final = stbi__zreceive(a,1); - type = stbi__zreceive(a,2); - if (type == 0) { - if (!stbi__parse_uncompressed_block(a)) return 0; - } else if (type == 3) { - return 0; - } else { - if (type == 1) { - // use fixed code lengths - if (!stbi__zbuild_huffman(&a->z_length , stbi__zdefault_length , 288)) return 0; - if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance, 32)) return 0; - } else { - if (!stbi__compute_huffman_codes(a)) return 0; - } - if (!stbi__parse_huffman_block(a)) return 0; - } + final = stbi__zreceive(a,1); + type = stbi__zreceive(a,2); + if (type == 0) { + if (!stbi__parse_uncompressed_block(a)) return 0; + } else if (type == 3) { + return 0; + } else { + if (type == 1) { + // use fixed code lengths + if (!stbi__zbuild_huffman(&a->z_length , stbi__zdefault_length , 288)) return 0; + if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance, 32)) return 0; + } else { + if (!stbi__compute_huffman_codes(a)) return 0; + } + if (!stbi__parse_huffman_block(a)) return 0; + } } while (!final); return 1; } @@ -4168,8 +4168,8 @@ static int stbi__parse_zlib(stbi__zbuf *a, int parse_header) static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header) { a->zout_start = obuf; - a->zout = obuf; - a->zout_end = obuf + olen; + a->zout = obuf; + a->zout_end = obuf + olen; a->z_expandable = exp; return stbi__parse_zlib(a, parse_header); @@ -4183,11 +4183,11 @@ STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int a.zbuffer = (stbi_uc *) buffer; a.zbuffer_end = (stbi_uc *) buffer + len; if (stbi__do_zlib(&a, p, initial_size, 1, 1)) { - if (outlen) *outlen = (int) (a.zout - a.zout_start); - return a.zout_start; + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; } else { - STBI_FREE(a.zout_start); - return NULL; + STBI_FREE(a.zout_start); + return NULL; } } @@ -4204,11 +4204,11 @@ STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, i a.zbuffer = (stbi_uc *) buffer; a.zbuffer_end = (stbi_uc *) buffer + len; if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) { - if (outlen) *outlen = (int) (a.zout - a.zout_start); - return a.zout_start; + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; } else { - STBI_FREE(a.zout_start); - return NULL; + STBI_FREE(a.zout_start); + return NULL; } } @@ -4218,9 +4218,9 @@ STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer a.zbuffer = (stbi_uc *) ibuffer; a.zbuffer_end = (stbi_uc *) ibuffer + ilen; if (stbi__do_zlib(&a, obuffer, olen, 0, 1)) - return (int) (a.zout - a.zout_start); + return (int) (a.zout - a.zout_start); else - return -1; + return -1; } STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen) @@ -4231,11 +4231,11 @@ STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int a.zbuffer = (stbi_uc *) buffer; a.zbuffer_end = (stbi_uc *) buffer+len; if (stbi__do_zlib(&a, p, 16384, 1, 0)) { - if (outlen) *outlen = (int) (a.zout - a.zout_start); - return a.zout_start; + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; } else { - STBI_FREE(a.zout_start); - return NULL; + STBI_FREE(a.zout_start); + return NULL; } } @@ -4245,21 +4245,21 @@ STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char a.zbuffer = (stbi_uc *) ibuffer; a.zbuffer_end = (stbi_uc *) ibuffer + ilen; if (stbi__do_zlib(&a, obuffer, olen, 0, 0)) - return (int) (a.zout - a.zout_start); + return (int) (a.zout - a.zout_start); else - return -1; + return -1; } #endif -// public domain "baseline" PNG decoder v0.10 Sean Barrett 2006-11-18 -// simple implementation -// - only 8-bit samples -// - no CRC checking -// - allocates lots of intermediate memory -// - avoids problem of streaming data between subsystems -// - avoids explicit window management -// performance -// - uses stb_zlib, a PD zlib implementation with fast huffman decoding +// public domain "baseline" PNG decoder v0.10 Sean Barrett 2006-11-18 +// simple implementation +// - only 8-bit samples +// - no CRC checking +// - allocates lots of intermediate memory +// - avoids problem of streaming data between subsystems +// - avoids explicit window management +// performance +// - uses stb_zlib, a PD zlib implementation with fast huffman decoding #ifndef STBI_NO_PNG typedef struct @@ -4272,7 +4272,7 @@ static stbi__pngchunk stbi__get_chunk_header(stbi__context *s) { stbi__pngchunk c; c.length = stbi__get32be(s); - c.type = stbi__get32be(s); + c.type = stbi__get32be(s); return c; } @@ -4281,7 +4281,7 @@ static int stbi__check_png_header(stbi__context *s) static const stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 }; int i; for (i=0; i < 8; ++i) - if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG"); + if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG"); return 1; } @@ -4354,186 +4354,186 @@ static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 r if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG"); for (j=0; j < y; ++j) { - stbi_uc *cur = a->out + stride*j; - stbi_uc *prior; - int filter = *raw++; - - if (filter > 4) - return stbi__err("invalid filter","Corrupt PNG"); - - if (depth < 8) { - STBI_ASSERT(img_width_bytes <= x); - cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place - filter_bytes = 1; - width = img_width_bytes; - } - prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above - - // if first row, use special filter that doesn't sample previous row - if (j == 0) filter = first_row_filter[filter]; - - // handle first byte explicitly - for (k=0; k < filter_bytes; ++k) { - switch (filter) { - case STBI__F_none : cur[k] = raw[k]; break; - case STBI__F_sub : cur[k] = raw[k]; break; - case STBI__F_up : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break; - case STBI__F_avg : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break; - case STBI__F_paeth : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break; - case STBI__F_avg_first : cur[k] = raw[k]; break; - case STBI__F_paeth_first: cur[k] = raw[k]; break; - } - } - - if (depth == 8) { - if (img_n != out_n) - cur[img_n] = 255; // first pixel - raw += img_n; - cur += out_n; - prior += out_n; - } else if (depth == 16) { - if (img_n != out_n) { - cur[filter_bytes] = 255; // first pixel top byte - cur[filter_bytes+1] = 255; // first pixel bottom byte - } - raw += filter_bytes; - cur += output_bytes; - prior += output_bytes; - } else { - raw += 1; - cur += 1; - prior += 1; - } - - // this is a little gross, so that we don't switch per-pixel or per-component - if (depth < 8 || img_n == out_n) { - int nk = (width - 1)*filter_bytes; - #define STBI__CASE(f) \ - case f: \ - for (k=0; k < nk; ++k) - switch (filter) { - // "none" filter turns into a memcpy here; make that explicit. - case STBI__F_none: memcpy(cur, raw, nk); break; - STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); } break; - STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break; - STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); } break; - STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); } break; - STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); } break; - STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); } break; - } - #undef STBI__CASE - raw += nk; - } else { - STBI_ASSERT(img_n+1 == out_n); - #define STBI__CASE(f) \ - case f: \ - for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \ - for (k=0; k < filter_bytes; ++k) - switch (filter) { - STBI__CASE(STBI__F_none) { cur[k] = raw[k]; } break; - STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); } break; - STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break; - STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); } break; - STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); } break; - STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); } break; - STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); } break; - } - #undef STBI__CASE - - // the loop above sets the high byte of the pixels' alpha, but for - // 16 bit png files we also need the low byte set. we'll do that here. - if (depth == 16) { - cur = a->out + stride*j; // start at the beginning of the row again - for (i=0; i < x; ++i,cur+=output_bytes) { - cur[filter_bytes+1] = 255; - } - } - } + stbi_uc *cur = a->out + stride*j; + stbi_uc *prior; + int filter = *raw++; + + if (filter > 4) + return stbi__err("invalid filter","Corrupt PNG"); + + if (depth < 8) { + STBI_ASSERT(img_width_bytes <= x); + cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place + filter_bytes = 1; + width = img_width_bytes; + } + prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above + + // if first row, use special filter that doesn't sample previous row + if (j == 0) filter = first_row_filter[filter]; + + // handle first byte explicitly + for (k=0; k < filter_bytes; ++k) { + switch (filter) { + case STBI__F_none : cur[k] = raw[k]; break; + case STBI__F_sub : cur[k] = raw[k]; break; + case STBI__F_up : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break; + case STBI__F_avg : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break; + case STBI__F_paeth : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break; + case STBI__F_avg_first : cur[k] = raw[k]; break; + case STBI__F_paeth_first: cur[k] = raw[k]; break; + } + } + + if (depth == 8) { + if (img_n != out_n) + cur[img_n] = 255; // first pixel + raw += img_n; + cur += out_n; + prior += out_n; + } else if (depth == 16) { + if (img_n != out_n) { + cur[filter_bytes] = 255; // first pixel top byte + cur[filter_bytes+1] = 255; // first pixel bottom byte + } + raw += filter_bytes; + cur += output_bytes; + prior += output_bytes; + } else { + raw += 1; + cur += 1; + prior += 1; + } + + // this is a little gross, so that we don't switch per-pixel or per-component + if (depth < 8 || img_n == out_n) { + int nk = (width - 1)*filter_bytes; + #define STBI__CASE(f) \ + case f: \ + for (k=0; k < nk; ++k) + switch (filter) { + // "none" filter turns into a memcpy here; make that explicit. + case STBI__F_none: memcpy(cur, raw, nk); break; + STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); } break; + STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break; + STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); } break; + STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); } break; + STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); } break; + STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); } break; + } + #undef STBI__CASE + raw += nk; + } else { + STBI_ASSERT(img_n+1 == out_n); + #define STBI__CASE(f) \ + case f: \ + for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \ + for (k=0; k < filter_bytes; ++k) + switch (filter) { + STBI__CASE(STBI__F_none) { cur[k] = raw[k]; } break; + STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); } break; + STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break; + STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); } break; + STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); } break; + STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); } break; + STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); } break; + } + #undef STBI__CASE + + // the loop above sets the high byte of the pixels' alpha, but for + // 16 bit png files we also need the low byte set. we'll do that here. + if (depth == 16) { + cur = a->out + stride*j; // start at the beginning of the row again + for (i=0; i < x; ++i,cur+=output_bytes) { + cur[filter_bytes+1] = 255; + } + } + } } // we make a separate pass to expand bits to pixels; for performance, // this could run two scanlines behind the above code, so it won't // intefere with filtering but will still be in the cache. if (depth < 8) { - for (j=0; j < y; ++j) { - stbi_uc *cur = a->out + stride*j; - stbi_uc *in = a->out + stride*j + x*out_n - img_width_bytes; - // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit - // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop - stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range - - // note that the final byte might overshoot and write more data than desired. - // we can allocate enough data that this never writes out of memory, but it - // could also overwrite the next scanline. can it overwrite non-empty data - // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel. - // so we need to explicitly clamp the final ones - - if (depth == 4) { - for (k=x*img_n; k >= 2; k-=2, ++in) { - *cur++ = scale * ((*in >> 4) ); - *cur++ = scale * ((*in ) & 0x0f); - } - if (k > 0) *cur++ = scale * ((*in >> 4) ); - } else if (depth == 2) { - for (k=x*img_n; k >= 4; k-=4, ++in) { - *cur++ = scale * ((*in >> 6) ); - *cur++ = scale * ((*in >> 4) & 0x03); - *cur++ = scale * ((*in >> 2) & 0x03); - *cur++ = scale * ((*in ) & 0x03); - } - if (k > 0) *cur++ = scale * ((*in >> 6) ); - if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03); - if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03); - } else if (depth == 1) { - for (k=x*img_n; k >= 8; k-=8, ++in) { - *cur++ = scale * ((*in >> 7) ); - *cur++ = scale * ((*in >> 6) & 0x01); - *cur++ = scale * ((*in >> 5) & 0x01); - *cur++ = scale * ((*in >> 4) & 0x01); - *cur++ = scale * ((*in >> 3) & 0x01); - *cur++ = scale * ((*in >> 2) & 0x01); - *cur++ = scale * ((*in >> 1) & 0x01); - *cur++ = scale * ((*in ) & 0x01); - } - if (k > 0) *cur++ = scale * ((*in >> 7) ); - if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01); - if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01); - if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01); - if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01); - if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01); - if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01); - } - if (img_n != out_n) { - int q; - // insert alpha = 255 - cur = a->out + stride*j; - if (img_n == 1) { - for (q=x-1; q >= 0; --q) { - cur[q*2+1] = 255; - cur[q*2+0] = cur[q]; - } - } else { - STBI_ASSERT(img_n == 3); - for (q=x-1; q >= 0; --q) { - cur[q*4+3] = 255; - cur[q*4+2] = cur[q*3+2]; - cur[q*4+1] = cur[q*3+1]; - cur[q*4+0] = cur[q*3+0]; - } - } - } - } + for (j=0; j < y; ++j) { + stbi_uc *cur = a->out + stride*j; + stbi_uc *in = a->out + stride*j + x*out_n - img_width_bytes; + // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit + // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop + stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range + + // note that the final byte might overshoot and write more data than desired. + // we can allocate enough data that this never writes out of memory, but it + // could also overwrite the next scanline. can it overwrite non-empty data + // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel. + // so we need to explicitly clamp the final ones + + if (depth == 4) { + for (k=x*img_n; k >= 2; k-=2, ++in) { + *cur++ = scale * ((*in >> 4) ); + *cur++ = scale * ((*in ) & 0x0f); + } + if (k > 0) *cur++ = scale * ((*in >> 4) ); + } else if (depth == 2) { + for (k=x*img_n; k >= 4; k-=4, ++in) { + *cur++ = scale * ((*in >> 6) ); + *cur++ = scale * ((*in >> 4) & 0x03); + *cur++ = scale * ((*in >> 2) & 0x03); + *cur++ = scale * ((*in ) & 0x03); + } + if (k > 0) *cur++ = scale * ((*in >> 6) ); + if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03); + if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03); + } else if (depth == 1) { + for (k=x*img_n; k >= 8; k-=8, ++in) { + *cur++ = scale * ((*in >> 7) ); + *cur++ = scale * ((*in >> 6) & 0x01); + *cur++ = scale * ((*in >> 5) & 0x01); + *cur++ = scale * ((*in >> 4) & 0x01); + *cur++ = scale * ((*in >> 3) & 0x01); + *cur++ = scale * ((*in >> 2) & 0x01); + *cur++ = scale * ((*in >> 1) & 0x01); + *cur++ = scale * ((*in ) & 0x01); + } + if (k > 0) *cur++ = scale * ((*in >> 7) ); + if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01); + if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01); + if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01); + if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01); + if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01); + if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01); + } + if (img_n != out_n) { + int q; + // insert alpha = 255 + cur = a->out + stride*j; + if (img_n == 1) { + for (q=x-1; q >= 0; --q) { + cur[q*2+1] = 255; + cur[q*2+0] = cur[q]; + } + } else { + STBI_ASSERT(img_n == 3); + for (q=x-1; q >= 0; --q) { + cur[q*4+3] = 255; + cur[q*4+2] = cur[q*3+2]; + cur[q*4+1] = cur[q*3+1]; + cur[q*4+0] = cur[q*3+0]; + } + } + } + } } else if (depth == 16) { - // force the image data from big-endian to platform-native. - // this is done in a separate pass due to the decoding relying - // on the data being untouched, but could probably be done - // per-line during decode if care is taken. - stbi_uc *cur = a->out; - stbi__uint16 *cur16 = (stbi__uint16*)cur; + // force the image data from big-endian to platform-native. + // this is done in a separate pass due to the decoding relying + // on the data being untouched, but could probably be done + // per-line during decode if care is taken. + stbi_uc *cur = a->out; + stbi__uint16 *cur16 = (stbi__uint16*)cur; - for(i=0; i < x*y*out_n; ++i,cur16++,cur+=2) { - *cur16 = (cur[0] << 8) | cur[1]; - } + for(i=0; i < x*y*out_n; ++i,cur16++,cur+=2) { + *cur16 = (cur[0] << 8) | cur[1]; + } } return 1; @@ -4546,37 +4546,37 @@ static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint3 stbi_uc *final; int p; if (!interlaced) - return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color); + return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color); // de-interlacing final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0); for (p=0; p < 7; ++p) { - int xorig[] = { 0,4,0,2,0,1,0 }; - int yorig[] = { 0,0,4,0,2,0,1 }; - int xspc[] = { 8,8,4,4,2,2,1 }; - int yspc[] = { 8,8,8,4,4,2,2 }; - int i,j,x,y; - // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1 - x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p]; - y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p]; - if (x && y) { - stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y; - if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) { - STBI_FREE(final); - return 0; - } - for (j=0; j < y; ++j) { - for (i=0; i < x; ++i) { - int out_y = j*yspc[p]+yorig[p]; - int out_x = i*xspc[p]+xorig[p]; - memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes, - a->out + (j*x+i)*out_bytes, out_bytes); - } - } - STBI_FREE(a->out); - image_data += img_len; - image_data_len -= img_len; - } + int xorig[] = { 0,4,0,2,0,1,0 }; + int yorig[] = { 0,0,4,0,2,0,1 }; + int xspc[] = { 8,8,4,4,2,2,1 }; + int yspc[] = { 8,8,8,4,4,2,2 }; + int i,j,x,y; + // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1 + x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p]; + y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p]; + if (x && y) { + stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y; + if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) { + STBI_FREE(final); + return 0; + } + for (j=0; j < y; ++j) { + for (i=0; i < x; ++i) { + int out_y = j*yspc[p]+yorig[p]; + int out_x = i*xspc[p]+xorig[p]; + memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes, + a->out + (j*x+i)*out_bytes, out_bytes); + } + } + STBI_FREE(a->out); + image_data += img_len; + image_data_len -= img_len; + } } a->out = final; @@ -4594,16 +4594,16 @@ static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n) STBI_ASSERT(out_n == 2 || out_n == 4); if (out_n == 2) { - for (i=0; i < pixel_count; ++i) { - p[1] = (p[0] == tc[0] ? 0 : 255); - p += 2; - } + for (i=0; i < pixel_count; ++i) { + p[1] = (p[0] == tc[0] ? 0 : 255); + p += 2; + } } else { - for (i=0; i < pixel_count; ++i) { - if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) - p[3] = 0; - p += 4; - } + for (i=0; i < pixel_count; ++i) { + if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) + p[3] = 0; + p += 4; + } } return 1; } @@ -4619,16 +4619,16 @@ static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3], int ou STBI_ASSERT(out_n == 2 || out_n == 4); if (out_n == 2) { - for (i = 0; i < pixel_count; ++i) { - p[1] = (p[0] == tc[0] ? 0 : 65535); - p += 2; - } + for (i = 0; i < pixel_count; ++i) { + p[1] = (p[0] == tc[0] ? 0 : 65535); + p += 2; + } } else { - for (i = 0; i < pixel_count; ++i) { - if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) - p[3] = 0; - p += 4; - } + for (i = 0; i < pixel_count; ++i) { + if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) + p[3] = 0; + p += 4; + } } return 1; } @@ -4645,22 +4645,22 @@ static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int temp_out = p; if (pal_img_n == 3) { - for (i=0; i < pixel_count; ++i) { - int n = orig[i]*4; - p[0] = palette[n ]; - p[1] = palette[n+1]; - p[2] = palette[n+2]; - p += 3; - } + for (i=0; i < pixel_count; ++i) { + int n = orig[i]*4; + p[0] = palette[n ]; + p[1] = palette[n+1]; + p[2] = palette[n+2]; + p += 3; + } } else { - for (i=0; i < pixel_count; ++i) { - int n = orig[i]*4; - p[0] = palette[n ]; - p[1] = palette[n+1]; - p[2] = palette[n+2]; - p[3] = palette[n+3]; - p += 4; - } + for (i=0; i < pixel_count; ++i) { + int n = orig[i]*4; + p[0] = palette[n ]; + p[1] = palette[n+1]; + p[2] = palette[n+2]; + p[3] = palette[n+3]; + p += 4; + } } STBI_FREE(a->out); a->out = temp_out; @@ -4689,44 +4689,44 @@ static void stbi__de_iphone(stbi__png *z) stbi__uint32 i, pixel_count = s->img_x * s->img_y; stbi_uc *p = z->out; - if (s->img_out_n == 3) { // convert bgr to rgb - for (i=0; i < pixel_count; ++i) { - stbi_uc t = p[0]; - p[0] = p[2]; - p[2] = t; - p += 3; - } + if (s->img_out_n == 3) { // convert bgr to rgb + for (i=0; i < pixel_count; ++i) { + stbi_uc t = p[0]; + p[0] = p[2]; + p[2] = t; + p += 3; + } } else { - STBI_ASSERT(s->img_out_n == 4); - if (stbi__unpremultiply_on_load) { - // convert bgr to rgb and unpremultiply - for (i=0; i < pixel_count; ++i) { - stbi_uc a = p[3]; - stbi_uc t = p[0]; - if (a) { - stbi_uc half = a / 2; - p[0] = (p[2] * 255 + half) / a; - p[1] = (p[1] * 255 + half) / a; - p[2] = ( t * 255 + half) / a; - } else { - p[0] = p[2]; - p[2] = t; - } - p += 4; - } - } else { - // convert bgr to rgb - for (i=0; i < pixel_count; ++i) { - stbi_uc t = p[0]; - p[0] = p[2]; - p[2] = t; - p += 4; - } - } - } -} - -#define STBI__PNG_TYPE(a,b,c,d) (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d)) + STBI_ASSERT(s->img_out_n == 4); + if (stbi__unpremultiply_on_load) { + // convert bgr to rgb and unpremultiply + for (i=0; i < pixel_count; ++i) { + stbi_uc a = p[3]; + stbi_uc t = p[0]; + if (a) { + stbi_uc half = a / 2; + p[0] = (p[2] * 255 + half) / a; + p[1] = (p[1] * 255 + half) / a; + p[2] = ( t * 255 + half) / a; + } else { + p[0] = p[2]; + p[2] = t; + } + p += 4; + } + } else { + // convert bgr to rgb + for (i=0; i < pixel_count; ++i) { + stbi_uc t = p[0]; + p[0] = p[2]; + p[2] = t; + p += 4; + } + } + } +} + +#define STBI__PNG_TYPE(a,b,c,d) (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d)) static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp) { @@ -4746,157 +4746,157 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp) if (scan == STBI__SCAN_type) return 1; for (;;) { - stbi__pngchunk c = stbi__get_chunk_header(s); - switch (c.type) { - case STBI__PNG_TYPE('C','g','B','I'): - is_iphone = 1; - stbi__skip(s, c.length); - break; - case STBI__PNG_TYPE('I','H','D','R'): { - int comp,filter; - if (!first) return stbi__err("multiple IHDR","Corrupt PNG"); - first = 0; - if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG"); - s->img_x = stbi__get32be(s); if (s->img_x > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)"); - s->img_y = stbi__get32be(s); if (s->img_y > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)"); - z->depth = stbi__get8(s); if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16) return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only"); - color = stbi__get8(s); if (color > 6) return stbi__err("bad ctype","Corrupt PNG"); - if (color == 3 && z->depth == 16) return stbi__err("bad ctype","Corrupt PNG"); - if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG"); - comp = stbi__get8(s); if (comp) return stbi__err("bad comp method","Corrupt PNG"); - filter= stbi__get8(s); if (filter) return stbi__err("bad filter method","Corrupt PNG"); - interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG"); - if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG"); - if (!pal_img_n) { - s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0); - if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode"); - if (scan == STBI__SCAN_header) return 1; - } else { - // if paletted, then pal_n is our final components, and - // img_n is # components to decompress/filter. - s->img_n = 1; - if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG"); - // if SCAN_header, have to scan to see if we have a tRNS - } - break; - } - - case STBI__PNG_TYPE('P','L','T','E'): { - if (first) return stbi__err("first not IHDR", "Corrupt PNG"); - if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG"); - pal_len = c.length / 3; - if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG"); - for (i=0; i < pal_len; ++i) { - palette[i*4+0] = stbi__get8(s); - palette[i*4+1] = stbi__get8(s); - palette[i*4+2] = stbi__get8(s); - palette[i*4+3] = 255; - } - break; - } - - case STBI__PNG_TYPE('t','R','N','S'): { - if (first) return stbi__err("first not IHDR", "Corrupt PNG"); - if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG"); - if (pal_img_n) { - if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; } - if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG"); - if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG"); - pal_img_n = 4; - for (i=0; i < c.length; ++i) - palette[i*4+3] = stbi__get8(s); - } else { - if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG"); - if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG"); - has_trans = 1; - if (z->depth == 16) { - for (k = 0; k < s->img_n; ++k) tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is - } else { - for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger - } - } - break; - } - - case STBI__PNG_TYPE('I','D','A','T'): { - if (first) return stbi__err("first not IHDR", "Corrupt PNG"); - if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG"); - if (scan == STBI__SCAN_header) { s->img_n = pal_img_n; return 1; } - if ((int)(ioff + c.length) < (int)ioff) return 0; - if (ioff + c.length > idata_limit) { - stbi__uint32 idata_limit_old = idata_limit; - stbi_uc *p; - if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096; - while (ioff + c.length > idata_limit) - idata_limit *= 2; - STBI_NOTUSED(idata_limit_old); - p = (stbi_uc *) STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory"); - z->idata = p; - } - if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG"); - ioff += c.length; - break; - } - - case STBI__PNG_TYPE('I','E','N','D'): { - stbi__uint32 raw_len, bpl; - if (first) return stbi__err("first not IHDR", "Corrupt PNG"); - if (scan != STBI__SCAN_load) return 1; - if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG"); - // initial guess for decoded data size to avoid unnecessary reallocs - bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component - raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */; - z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone); - if (z->expanded == NULL) return 0; // zlib should set error - STBI_FREE(z->idata); z->idata = NULL; - if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans) - s->img_out_n = s->img_n+1; - else - s->img_out_n = s->img_n; - if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0; - if (has_trans) { - if (z->depth == 16) { - if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0; - } else { - if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0; - } - } - if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2) - stbi__de_iphone(z); - if (pal_img_n) { - // pal_img_n == 3 or 4 - s->img_n = pal_img_n; // record the actual colors we had - s->img_out_n = pal_img_n; - if (req_comp >= 3) s->img_out_n = req_comp; - if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n)) - return 0; - } else if (has_trans) { - // non-paletted image with tRNS -> source image has (constant) alpha - ++s->img_n; - } - STBI_FREE(z->expanded); z->expanded = NULL; - return 1; - } - - default: - // if critical, fail - if (first) return stbi__err("first not IHDR", "Corrupt PNG"); - if ((c.type & (1 << 29)) == 0) { - #ifndef STBI_NO_FAILURE_STRINGS - // not threadsafe - static char invalid_chunk[] = "XXXX PNG chunk not known"; - invalid_chunk[0] = STBI__BYTECAST(c.type >> 24); - invalid_chunk[1] = STBI__BYTECAST(c.type >> 16); - invalid_chunk[2] = STBI__BYTECAST(c.type >> 8); - invalid_chunk[3] = STBI__BYTECAST(c.type >> 0); - #endif - return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type"); - } - stbi__skip(s, c.length); - break; - } - // end of PNG chunk, read and skip CRC - stbi__get32be(s); + stbi__pngchunk c = stbi__get_chunk_header(s); + switch (c.type) { + case STBI__PNG_TYPE('C','g','B','I'): + is_iphone = 1; + stbi__skip(s, c.length); + break; + case STBI__PNG_TYPE('I','H','D','R'): { + int comp,filter; + if (!first) return stbi__err("multiple IHDR","Corrupt PNG"); + first = 0; + if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG"); + s->img_x = stbi__get32be(s); if (s->img_x > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)"); + s->img_y = stbi__get32be(s); if (s->img_y > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)"); + z->depth = stbi__get8(s); if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16) return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only"); + color = stbi__get8(s); if (color > 6) return stbi__err("bad ctype","Corrupt PNG"); + if (color == 3 && z->depth == 16) return stbi__err("bad ctype","Corrupt PNG"); + if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG"); + comp = stbi__get8(s); if (comp) return stbi__err("bad comp method","Corrupt PNG"); + filter= stbi__get8(s); if (filter) return stbi__err("bad filter method","Corrupt PNG"); + interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG"); + if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG"); + if (!pal_img_n) { + s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0); + if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode"); + if (scan == STBI__SCAN_header) return 1; + } else { + // if paletted, then pal_n is our final components, and + // img_n is # components to decompress/filter. + s->img_n = 1; + if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG"); + // if SCAN_header, have to scan to see if we have a tRNS + } + break; + } + + case STBI__PNG_TYPE('P','L','T','E'): { + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG"); + pal_len = c.length / 3; + if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG"); + for (i=0; i < pal_len; ++i) { + palette[i*4+0] = stbi__get8(s); + palette[i*4+1] = stbi__get8(s); + palette[i*4+2] = stbi__get8(s); + palette[i*4+3] = 255; + } + break; + } + + case STBI__PNG_TYPE('t','R','N','S'): { + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG"); + if (pal_img_n) { + if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; } + if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG"); + if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG"); + pal_img_n = 4; + for (i=0; i < c.length; ++i) + palette[i*4+3] = stbi__get8(s); + } else { + if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG"); + if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG"); + has_trans = 1; + if (z->depth == 16) { + for (k = 0; k < s->img_n; ++k) tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is + } else { + for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger + } + } + break; + } + + case STBI__PNG_TYPE('I','D','A','T'): { + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG"); + if (scan == STBI__SCAN_header) { s->img_n = pal_img_n; return 1; } + if ((int)(ioff + c.length) < (int)ioff) return 0; + if (ioff + c.length > idata_limit) { + stbi__uint32 idata_limit_old = idata_limit; + stbi_uc *p; + if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096; + while (ioff + c.length > idata_limit) + idata_limit *= 2; + STBI_NOTUSED(idata_limit_old); + p = (stbi_uc *) STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory"); + z->idata = p; + } + if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG"); + ioff += c.length; + break; + } + + case STBI__PNG_TYPE('I','E','N','D'): { + stbi__uint32 raw_len, bpl; + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (scan != STBI__SCAN_load) return 1; + if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG"); + // initial guess for decoded data size to avoid unnecessary reallocs + bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component + raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */; + z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone); + if (z->expanded == NULL) return 0; // zlib should set error + STBI_FREE(z->idata); z->idata = NULL; + if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans) + s->img_out_n = s->img_n+1; + else + s->img_out_n = s->img_n; + if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0; + if (has_trans) { + if (z->depth == 16) { + if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0; + } else { + if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0; + } + } + if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2) + stbi__de_iphone(z); + if (pal_img_n) { + // pal_img_n == 3 or 4 + s->img_n = pal_img_n; // record the actual colors we had + s->img_out_n = pal_img_n; + if (req_comp >= 3) s->img_out_n = req_comp; + if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n)) + return 0; + } else if (has_trans) { + // non-paletted image with tRNS -> source image has (constant) alpha + ++s->img_n; + } + STBI_FREE(z->expanded); z->expanded = NULL; + return 1; + } + + default: + // if critical, fail + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if ((c.type & (1 << 29)) == 0) { + #ifndef STBI_NO_FAILURE_STRINGS + // not threadsafe + static char invalid_chunk[] = "XXXX PNG chunk not known"; + invalid_chunk[0] = STBI__BYTECAST(c.type >> 24); + invalid_chunk[1] = STBI__BYTECAST(c.type >> 16); + invalid_chunk[2] = STBI__BYTECAST(c.type >> 8); + invalid_chunk[3] = STBI__BYTECAST(c.type >> 0); + #endif + return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type"); + } + stbi__skip(s, c.length); + break; + } + // end of PNG chunk, read and skip CRC + stbi__get32be(s); } } @@ -4905,27 +4905,27 @@ static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, st void *result=NULL; if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error"); if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) { - if (p->depth < 8) - ri->bits_per_channel = 8; - else - ri->bits_per_channel = p->depth; - result = p->out; - p->out = NULL; - if (req_comp && req_comp != p->s->img_out_n) { - if (ri->bits_per_channel == 8) - result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y); - else - result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y); - p->s->img_out_n = req_comp; - if (result == NULL) return result; - } - *x = p->s->img_x; - *y = p->s->img_y; - if (n) *n = p->s->img_n; - } - STBI_FREE(p->out); p->out = NULL; + if (p->depth < 8) + ri->bits_per_channel = 8; + else + ri->bits_per_channel = p->depth; + result = p->out; + p->out = NULL; + if (req_comp && req_comp != p->s->img_out_n) { + if (ri->bits_per_channel == 8) + result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y); + else + result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y); + p->s->img_out_n = req_comp; + if (result == NULL) return result; + } + *x = p->s->img_x; + *y = p->s->img_y; + if (n) *n = p->s->img_n; + } + STBI_FREE(p->out); p->out = NULL; STBI_FREE(p->expanded); p->expanded = NULL; - STBI_FREE(p->idata); p->idata = NULL; + STBI_FREE(p->idata); p->idata = NULL; return result; } @@ -4948,8 +4948,8 @@ static int stbi__png_test(stbi__context *s) static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp) { if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) { - stbi__rewind( p->s ); - return 0; + stbi__rewind( p->s ); + return 0; } if (x) *x = p->s->img_x; if (y) *y = p->s->img_y; @@ -4971,8 +4971,8 @@ static int stbi__png_is16(stbi__context *s) if (!stbi__png_info_raw(&p, NULL, NULL, NULL)) return 0; if (p.depth != 16) { - stbi__rewind(p.s); - return 0; + stbi__rewind(p.s); + return 0; } return 1; } @@ -5010,10 +5010,10 @@ static int stbi__high_bit(unsigned int z) int n=0; if (z == 0) return -1; if (z >= 0x10000) n += 16, z >>= 16; - if (z >= 0x00100) n += 8, z >>= 8; - if (z >= 0x00010) n += 4, z >>= 4; - if (z >= 0x00004) n += 2, z >>= 2; - if (z >= 0x00002) n += 1, z >>= 1; + if (z >= 0x00100) n += 8, z >>= 8; + if (z >= 0x00010) n += 4, z >>= 4; + if (z >= 0x00004) n += 2, z >>= 2; + if (z >= 0x00002) n += 1, z >>= 1; return n; } @@ -5033,17 +5033,17 @@ static int stbi__bitcount(unsigned int a) static int stbi__shiftsigned(int v, int shift, int bits) { static unsigned int mul_table[9] = { - 0, - 0xff/*0b11111111*/, 0x55/*0b01010101*/, 0x49/*0b01001001*/, 0x11/*0b00010001*/, - 0x21/*0b00100001*/, 0x41/*0b01000001*/, 0x81/*0b10000001*/, 0x01/*0b00000001*/, + 0, + 0xff/*0b11111111*/, 0x55/*0b01010101*/, 0x49/*0b01001001*/, 0x11/*0b00010001*/, + 0x21/*0b00100001*/, 0x41/*0b01000001*/, 0x81/*0b10000001*/, 0x01/*0b00000001*/, }; static unsigned int shift_table[9] = { - 0, 0,0,1,0,2,4,6,0, + 0, 0,0,1,0,2,4,6,0, }; if (shift < 0) - v <<= -shift; + v <<= -shift; else - v >>= shift; + v >>= shift; STBI_ASSERT(v >= 0 && v < 256); v >>= (8-bits); STBI_ASSERT(bits >= 0 && bits <= 8); @@ -5069,72 +5069,72 @@ static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info) if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown"); if (hsz == 12) { - s->img_x = stbi__get16le(s); - s->img_y = stbi__get16le(s); + s->img_x = stbi__get16le(s); + s->img_y = stbi__get16le(s); } else { - s->img_x = stbi__get32le(s); - s->img_y = stbi__get32le(s); + s->img_x = stbi__get32le(s); + s->img_y = stbi__get32le(s); } if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP"); info->bpp = stbi__get16le(s); if (hsz != 12) { - int compress = stbi__get32le(s); - if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE"); - stbi__get32le(s); // discard sizeof - stbi__get32le(s); // discard hres - stbi__get32le(s); // discard vres - stbi__get32le(s); // discard colorsused - stbi__get32le(s); // discard max important - if (hsz == 40 || hsz == 56) { - if (hsz == 56) { - stbi__get32le(s); - stbi__get32le(s); - stbi__get32le(s); - stbi__get32le(s); - } - if (info->bpp == 16 || info->bpp == 32) { - if (compress == 0) { - if (info->bpp == 32) { - info->mr = 0xffu << 16; - info->mg = 0xffu << 8; - info->mb = 0xffu << 0; - info->ma = 0xffu << 24; - info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0 - } else { - info->mr = 31u << 10; - info->mg = 31u << 5; - info->mb = 31u << 0; - } - } else if (compress == 3) { - info->mr = stbi__get32le(s); - info->mg = stbi__get32le(s); - info->mb = stbi__get32le(s); - // not documented, but generated by photoshop and handled by mspaint - if (info->mr == info->mg && info->mg == info->mb) { - // ?!?!? - return stbi__errpuc("bad BMP", "bad BMP"); - } - } else - return stbi__errpuc("bad BMP", "bad BMP"); - } - } else { - int i; - if (hsz != 108 && hsz != 124) - return stbi__errpuc("bad BMP", "bad BMP"); - info->mr = stbi__get32le(s); - info->mg = stbi__get32le(s); - info->mb = stbi__get32le(s); - info->ma = stbi__get32le(s); - stbi__get32le(s); // discard color space - for (i=0; i < 12; ++i) - stbi__get32le(s); // discard color space parameters - if (hsz == 124) { - stbi__get32le(s); // discard rendering intent - stbi__get32le(s); // discard offset of profile data - stbi__get32le(s); // discard size of profile data - stbi__get32le(s); // discard reserved - } - } + int compress = stbi__get32le(s); + if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE"); + stbi__get32le(s); // discard sizeof + stbi__get32le(s); // discard hres + stbi__get32le(s); // discard vres + stbi__get32le(s); // discard colorsused + stbi__get32le(s); // discard max important + if (hsz == 40 || hsz == 56) { + if (hsz == 56) { + stbi__get32le(s); + stbi__get32le(s); + stbi__get32le(s); + stbi__get32le(s); + } + if (info->bpp == 16 || info->bpp == 32) { + if (compress == 0) { + if (info->bpp == 32) { + info->mr = 0xffu << 16; + info->mg = 0xffu << 8; + info->mb = 0xffu << 0; + info->ma = 0xffu << 24; + info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0 + } else { + info->mr = 31u << 10; + info->mg = 31u << 5; + info->mb = 31u << 0; + } + } else if (compress == 3) { + info->mr = stbi__get32le(s); + info->mg = stbi__get32le(s); + info->mb = stbi__get32le(s); + // not documented, but generated by photoshop and handled by mspaint + if (info->mr == info->mg && info->mg == info->mb) { + // ?!?!? + return stbi__errpuc("bad BMP", "bad BMP"); + } + } else + return stbi__errpuc("bad BMP", "bad BMP"); + } + } else { + int i; + if (hsz != 108 && hsz != 124) + return stbi__errpuc("bad BMP", "bad BMP"); + info->mr = stbi__get32le(s); + info->mg = stbi__get32le(s); + info->mb = stbi__get32le(s); + info->ma = stbi__get32le(s); + stbi__get32le(s); // discard color space + for (i=0; i < 12; ++i) + stbi__get32le(s); // discard color space parameters + if (hsz == 124) { + stbi__get32le(s); // discard rendering intent + stbi__get32le(s); // discard offset of profile data + stbi__get32le(s); // discard size of profile data + stbi__get32le(s); // discard reserved + } + } } return (void *) 1; } @@ -5152,7 +5152,7 @@ static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req info.all_a = 255; if (stbi__bmp_parse_header(s, &info) == NULL) - return NULL; // error code already set + return NULL; // error code already set flip_vertically = ((int) s->img_y) > 0; s->img_y = abs((int) s->img_y); @@ -5164,149 +5164,149 @@ static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req all_a = info.all_a; if (info.hsz == 12) { - if (info.bpp < 24) - psize = (info.offset - 14 - 24) / 3; + if (info.bpp < 24) + psize = (info.offset - 14 - 24) / 3; } else { - if (info.bpp < 16) - psize = (info.offset - 14 - info.hsz) >> 2; + if (info.bpp < 16) + psize = (info.offset - 14 - info.hsz) >> 2; } s->img_n = ma ? 4 : 3; if (req_comp && req_comp >= 3) // we can directly decode 3 or 4 - target = req_comp; + target = req_comp; else - target = s->img_n; // if they want monochrome, we'll post-convert + target = s->img_n; // if they want monochrome, we'll post-convert // sanity-check size if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0)) - return stbi__errpuc("too large", "Corrupt BMP"); + return stbi__errpuc("too large", "Corrupt BMP"); out = (stbi_uc *) stbi__malloc_mad3(target, s->img_x, s->img_y, 0); if (!out) return stbi__errpuc("outofmem", "Out of memory"); if (info.bpp < 16) { - int z=0; - if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); } - for (i=0; i < psize; ++i) { - pal[i][2] = stbi__get8(s); - pal[i][1] = stbi__get8(s); - pal[i][0] = stbi__get8(s); - if (info.hsz != 12) stbi__get8(s); - pal[i][3] = 255; - } - stbi__skip(s, info.offset - 14 - info.hsz - psize * (info.hsz == 12 ? 3 : 4)); - if (info.bpp == 1) width = (s->img_x + 7) >> 3; - else if (info.bpp == 4) width = (s->img_x + 1) >> 1; - else if (info.bpp == 8) width = s->img_x; - else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); } - pad = (-width)&3; - if (info.bpp == 1) { - for (j=0; j < (int) s->img_y; ++j) { - int bit_offset = 7, v = stbi__get8(s); - for (i=0; i < (int) s->img_x; ++i) { - int color = (v>>bit_offset)&0x1; - out[z++] = pal[color][0]; - out[z++] = pal[color][1]; - out[z++] = pal[color][2]; - if((--bit_offset) < 0) { - bit_offset = 7; - v = stbi__get8(s); - } - } - stbi__skip(s, pad); - } - } else { - for (j=0; j < (int) s->img_y; ++j) { - for (i=0; i < (int) s->img_x; i += 2) { - int v=stbi__get8(s),v2=0; - if (info.bpp == 4) { - v2 = v & 15; - v >>= 4; - } - out[z++] = pal[v][0]; - out[z++] = pal[v][1]; - out[z++] = pal[v][2]; - if (target == 4) out[z++] = 255; - if (i+1 == (int) s->img_x) break; - v = (info.bpp == 8) ? stbi__get8(s) : v2; - out[z++] = pal[v][0]; - out[z++] = pal[v][1]; - out[z++] = pal[v][2]; - if (target == 4) out[z++] = 255; - } - stbi__skip(s, pad); - } - } + int z=0; + if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); } + for (i=0; i < psize; ++i) { + pal[i][2] = stbi__get8(s); + pal[i][1] = stbi__get8(s); + pal[i][0] = stbi__get8(s); + if (info.hsz != 12) stbi__get8(s); + pal[i][3] = 255; + } + stbi__skip(s, info.offset - 14 - info.hsz - psize * (info.hsz == 12 ? 3 : 4)); + if (info.bpp == 1) width = (s->img_x + 7) >> 3; + else if (info.bpp == 4) width = (s->img_x + 1) >> 1; + else if (info.bpp == 8) width = s->img_x; + else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); } + pad = (-width)&3; + if (info.bpp == 1) { + for (j=0; j < (int) s->img_y; ++j) { + int bit_offset = 7, v = stbi__get8(s); + for (i=0; i < (int) s->img_x; ++i) { + int color = (v>>bit_offset)&0x1; + out[z++] = pal[color][0]; + out[z++] = pal[color][1]; + out[z++] = pal[color][2]; + if((--bit_offset) < 0) { + bit_offset = 7; + v = stbi__get8(s); + } + } + stbi__skip(s, pad); + } + } else { + for (j=0; j < (int) s->img_y; ++j) { + for (i=0; i < (int) s->img_x; i += 2) { + int v=stbi__get8(s),v2=0; + if (info.bpp == 4) { + v2 = v & 15; + v >>= 4; + } + out[z++] = pal[v][0]; + out[z++] = pal[v][1]; + out[z++] = pal[v][2]; + if (target == 4) out[z++] = 255; + if (i+1 == (int) s->img_x) break; + v = (info.bpp == 8) ? stbi__get8(s) : v2; + out[z++] = pal[v][0]; + out[z++] = pal[v][1]; + out[z++] = pal[v][2]; + if (target == 4) out[z++] = 255; + } + stbi__skip(s, pad); + } + } } else { - int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0; - int z = 0; - int easy=0; - stbi__skip(s, info.offset - 14 - info.hsz); - if (info.bpp == 24) width = 3 * s->img_x; - else if (info.bpp == 16) width = 2*s->img_x; - else /* bpp = 32 and pad = 0 */ width=0; - pad = (-width) & 3; - if (info.bpp == 24) { - easy = 1; - } else if (info.bpp == 32) { - if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000) - easy = 2; - } - if (!easy) { - if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); } - // right shift amt to put high bit in position #7 - rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr); - gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg); - bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb); - ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma); - } - for (j=0; j < (int) s->img_y; ++j) { - if (easy) { - for (i=0; i < (int) s->img_x; ++i) { - unsigned char a; - out[z+2] = stbi__get8(s); - out[z+1] = stbi__get8(s); - out[z+0] = stbi__get8(s); - z += 3; - a = (easy == 2 ? stbi__get8(s) : 255); - all_a |= a; - if (target == 4) out[z++] = a; - } - } else { - int bpp = info.bpp; - for (i=0; i < (int) s->img_x; ++i) { - stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s)); - unsigned int a; - out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount)); - out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount)); - out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount)); - a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255); - all_a |= a; - if (target == 4) out[z++] = STBI__BYTECAST(a); - } - } - stbi__skip(s, pad); - } + int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0; + int z = 0; + int easy=0; + stbi__skip(s, info.offset - 14 - info.hsz); + if (info.bpp == 24) width = 3 * s->img_x; + else if (info.bpp == 16) width = 2*s->img_x; + else /* bpp = 32 and pad = 0 */ width=0; + pad = (-width) & 3; + if (info.bpp == 24) { + easy = 1; + } else if (info.bpp == 32) { + if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000) + easy = 2; + } + if (!easy) { + if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); } + // right shift amt to put high bit in position #7 + rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr); + gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg); + bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb); + ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma); + } + for (j=0; j < (int) s->img_y; ++j) { + if (easy) { + for (i=0; i < (int) s->img_x; ++i) { + unsigned char a; + out[z+2] = stbi__get8(s); + out[z+1] = stbi__get8(s); + out[z+0] = stbi__get8(s); + z += 3; + a = (easy == 2 ? stbi__get8(s) : 255); + all_a |= a; + if (target == 4) out[z++] = a; + } + } else { + int bpp = info.bpp; + for (i=0; i < (int) s->img_x; ++i) { + stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s)); + unsigned int a; + out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount)); + out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount)); + out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount)); + a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255); + all_a |= a; + if (target == 4) out[z++] = STBI__BYTECAST(a); + } + } + stbi__skip(s, pad); + } } // if alpha channel is all 0s, replace with all 255s if (target == 4 && all_a == 0) - for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4) - out[i] = 255; + for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4) + out[i] = 255; if (flip_vertically) { - stbi_uc t; - for (j=0; j < (int) s->img_y>>1; ++j) { - stbi_uc *p1 = out + j *s->img_x*target; - stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target; - for (i=0; i < (int) s->img_x*target; ++i) { - t = p1[i], p1[i] = p2[i], p2[i] = t; - } - } + stbi_uc t; + for (j=0; j < (int) s->img_y>>1; ++j) { + stbi_uc *p1 = out + j *s->img_x*target; + stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target; + for (i=0; i < (int) s->img_x*target; ++i) { + t = p1[i], p1[i] = p2[i], p2[i] = t; + } + } } if (req_comp && req_comp != target) { - out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y); - if (out == NULL) return out; // stbi__convert_format frees input on failure + out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y); + if (out == NULL) return out; // stbi__convert_format frees input on failure } *x = s->img_x; @@ -5325,103 +5325,103 @@ static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16) // only RGB or RGBA (incl. 16bit) or grey allowed if (is_rgb16) *is_rgb16 = 0; switch(bits_per_pixel) { - case 8: return STBI_grey; - case 16: if(is_grey) return STBI_grey_alpha; - // fallthrough - case 15: if(is_rgb16) *is_rgb16 = 1; - return STBI_rgb; - case 24: // fallthrough - case 32: return bits_per_pixel/8; - default: return 0; + case 8: return STBI_grey; + case 16: if(is_grey) return STBI_grey_alpha; + // fallthrough + case 15: if(is_rgb16) *is_rgb16 = 1; + return STBI_rgb; + case 24: // fallthrough + case 32: return bits_per_pixel/8; + default: return 0; } } static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp) { - int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp; - int sz, tga_colormap_type; - stbi__get8(s); // discard Offset - tga_colormap_type = stbi__get8(s); // colormap type - if( tga_colormap_type > 1 ) { - stbi__rewind(s); - return 0; // only RGB or indexed allowed - } - tga_image_type = stbi__get8(s); // image type - if ( tga_colormap_type == 1 ) { // colormapped (paletted) image - if (tga_image_type != 1 && tga_image_type != 9) { - stbi__rewind(s); - return 0; - } - stbi__skip(s,4); // skip index of first colormap entry and number of entries - sz = stbi__get8(s); // check bits per palette color entry - if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) { - stbi__rewind(s); - return 0; - } - stbi__skip(s,4); // skip image x and y origin - tga_colormap_bpp = sz; - } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE - if ( (tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11) ) { - stbi__rewind(s); - return 0; // only RGB or grey allowed, +/- RLE - } - stbi__skip(s,9); // skip colormap specification and image x/y origin - tga_colormap_bpp = 0; - } - tga_w = stbi__get16le(s); - if( tga_w < 1 ) { - stbi__rewind(s); - return 0; // test width - } - tga_h = stbi__get16le(s); - if( tga_h < 1 ) { - stbi__rewind(s); - return 0; // test height - } - tga_bits_per_pixel = stbi__get8(s); // bits per pixel - stbi__get8(s); // ignore alpha bits - if (tga_colormap_bpp != 0) { - if((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) { - // when using a colormap, tga_bits_per_pixel is the size of the indexes - // I don't think anything but 8 or 16bit indexes makes sense - stbi__rewind(s); - return 0; - } - tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL); - } else { - tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL); - } - if(!tga_comp) { - stbi__rewind(s); - return 0; - } - if (x) *x = tga_w; - if (y) *y = tga_h; - if (comp) *comp = tga_comp; - return 1; // seems to have passed everything + int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp; + int sz, tga_colormap_type; + stbi__get8(s); // discard Offset + tga_colormap_type = stbi__get8(s); // colormap type + if( tga_colormap_type > 1 ) { + stbi__rewind(s); + return 0; // only RGB or indexed allowed + } + tga_image_type = stbi__get8(s); // image type + if ( tga_colormap_type == 1 ) { // colormapped (paletted) image + if (tga_image_type != 1 && tga_image_type != 9) { + stbi__rewind(s); + return 0; + } + stbi__skip(s,4); // skip index of first colormap entry and number of entries + sz = stbi__get8(s); // check bits per palette color entry + if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) { + stbi__rewind(s); + return 0; + } + stbi__skip(s,4); // skip image x and y origin + tga_colormap_bpp = sz; + } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE + if ( (tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11) ) { + stbi__rewind(s); + return 0; // only RGB or grey allowed, +/- RLE + } + stbi__skip(s,9); // skip colormap specification and image x/y origin + tga_colormap_bpp = 0; + } + tga_w = stbi__get16le(s); + if( tga_w < 1 ) { + stbi__rewind(s); + return 0; // test width + } + tga_h = stbi__get16le(s); + if( tga_h < 1 ) { + stbi__rewind(s); + return 0; // test height + } + tga_bits_per_pixel = stbi__get8(s); // bits per pixel + stbi__get8(s); // ignore alpha bits + if (tga_colormap_bpp != 0) { + if((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) { + // when using a colormap, tga_bits_per_pixel is the size of the indexes + // I don't think anything but 8 or 16bit indexes makes sense + stbi__rewind(s); + return 0; + } + tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL); + } else { + tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL); + } + if(!tga_comp) { + stbi__rewind(s); + return 0; + } + if (x) *x = tga_w; + if (y) *y = tga_h; + if (comp) *comp = tga_comp; + return 1; // seems to have passed everything } static int stbi__tga_test(stbi__context *s) { int res = 0; int sz, tga_color_type; - stbi__get8(s); // discard Offset - tga_color_type = stbi__get8(s); // color type - if ( tga_color_type > 1 ) goto errorEnd; // only RGB or indexed allowed - sz = stbi__get8(s); // image type + stbi__get8(s); // discard Offset + tga_color_type = stbi__get8(s); // color type + if ( tga_color_type > 1 ) goto errorEnd; // only RGB or indexed allowed + sz = stbi__get8(s); // image type if ( tga_color_type == 1 ) { // colormapped (paletted) image - if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9 - stbi__skip(s,4); // skip index of first colormap entry and number of entries - sz = stbi__get8(s); // check bits per palette color entry - if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd; - stbi__skip(s,4); // skip image x and y origin + if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9 + stbi__skip(s,4); // skip index of first colormap entry and number of entries + sz = stbi__get8(s); // check bits per palette color entry + if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd; + stbi__skip(s,4); // skip image x and y origin } else { // "normal" image w/o colormap - if ( (sz != 2) && (sz != 3) && (sz != 10) && (sz != 11) ) goto errorEnd; // only RGB or grey allowed, +/- RLE - stbi__skip(s,9); // skip colormap specification and image x/y origin + if ( (sz != 2) && (sz != 3) && (sz != 10) && (sz != 11) ) goto errorEnd; // only RGB or grey allowed, +/- RLE + stbi__skip(s,9); // skip colormap specification and image x/y origin } - if ( stbi__get16le(s) < 1 ) goto errorEnd; // test width - if ( stbi__get16le(s) < 1 ) goto errorEnd; // test height - sz = stbi__get8(s); // bits per pixel + if ( stbi__get16le(s) < 1 ) goto errorEnd; // test width + if ( stbi__get16le(s) < 1 ) goto errorEnd; // test height + sz = stbi__get8(s); // bits per pixel if ( (tga_color_type == 1) && (sz != 8) && (sz != 16) ) goto errorEnd; // for colormapped images, bpp is size of an index if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd; @@ -5454,7 +5454,7 @@ static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out) static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) { - // read in the TGA header stuff + // read in the TGA header stuff int tga_offset = stbi__get8(s); int tga_indexed = stbi__get8(s); int tga_image_type = stbi__get8(s); @@ -5470,7 +5470,7 @@ static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req int tga_comp, tga_rgb16=0; int tga_inverted = stbi__get8(s); // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?) - // image data + // image data unsigned char *tga_data; unsigned char *tga_palette = NULL; int i, j; @@ -5480,28 +5480,28 @@ static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req int read_next_pixel = 1; STBI_NOTUSED(ri); - // do a tiny bit of precessing + // do a tiny bit of precessing if ( tga_image_type >= 8 ) { - tga_image_type -= 8; - tga_is_RLE = 1; + tga_image_type -= 8; + tga_is_RLE = 1; } tga_inverted = 1 - ((tga_inverted >> 5) & 1); - // If I'm paletted, then I'll use the number of bits from the palette + // If I'm paletted, then I'll use the number of bits from the palette if ( tga_indexed ) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16); else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16); if(!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency - return stbi__errpuc("bad format", "Can't find out TGA pixelformat"); + return stbi__errpuc("bad format", "Can't find out TGA pixelformat"); - // tga info + // tga info *x = tga_width; *y = tga_height; if (comp) *comp = tga_comp; if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0)) - return stbi__errpuc("too large", "Corrupt TGA"); + return stbi__errpuc("too large", "Corrupt TGA"); tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0); if (!tga_data) return stbi__errpuc("outofmem", "Out of memory"); @@ -5510,139 +5510,139 @@ static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req stbi__skip(s, tga_offset ); if ( !tga_indexed && !tga_is_RLE && !tga_rgb16 ) { - for (i=0; i < tga_height; ++i) { - int row = tga_inverted ? tga_height -i - 1 : i; - stbi_uc *tga_row = tga_data + row*tga_width*tga_comp; - stbi__getn(s, tga_row, tga_width * tga_comp); - } + for (i=0; i < tga_height; ++i) { + int row = tga_inverted ? tga_height -i - 1 : i; + stbi_uc *tga_row = tga_data + row*tga_width*tga_comp; + stbi__getn(s, tga_row, tga_width * tga_comp); + } } else { - // do I need to load a palette? - if ( tga_indexed) - { - // any data to skip? (offset usually = 0) - stbi__skip(s, tga_palette_start ); - // load the palette - tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0); - if (!tga_palette) { - STBI_FREE(tga_data); - return stbi__errpuc("outofmem", "Out of memory"); - } - if (tga_rgb16) { - stbi_uc *pal_entry = tga_palette; - STBI_ASSERT(tga_comp == STBI_rgb); - for (i=0; i < tga_palette_len; ++i) { - stbi__tga_read_rgb16(s, pal_entry); - pal_entry += tga_comp; - } - } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) { - STBI_FREE(tga_data); - STBI_FREE(tga_palette); - return stbi__errpuc("bad palette", "Corrupt TGA"); - } - } - // load the data - for (i=0; i < tga_width * tga_height; ++i) - { - // if I'm in RLE mode, do I need to get a RLE stbi__pngchunk? - if ( tga_is_RLE ) - { - if ( RLE_count == 0 ) - { - // yep, get the next byte as a RLE command - int RLE_cmd = stbi__get8(s); - RLE_count = 1 + (RLE_cmd & 127); - RLE_repeating = RLE_cmd >> 7; - read_next_pixel = 1; - } else if ( !RLE_repeating ) - { - read_next_pixel = 1; - } - } else - { - read_next_pixel = 1; - } - // OK, if I need to read a pixel, do it now - if ( read_next_pixel ) - { - // load however much data we did have - if ( tga_indexed ) - { - // read in index, then perform the lookup - int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s); - if ( pal_idx >= tga_palette_len ) { - // invalid index - pal_idx = 0; - } - pal_idx *= tga_comp; - for (j = 0; j < tga_comp; ++j) { - raw_data[j] = tga_palette[pal_idx+j]; - } - } else if(tga_rgb16) { - STBI_ASSERT(tga_comp == STBI_rgb); - stbi__tga_read_rgb16(s, raw_data); - } else { - // read in the data raw - for (j = 0; j < tga_comp; ++j) { - raw_data[j] = stbi__get8(s); - } - } - // clear the reading flag for the next pixel - read_next_pixel = 0; - } // end of reading a pixel - - // copy data - for (j = 0; j < tga_comp; ++j) - tga_data[i*tga_comp+j] = raw_data[j]; - - // in case we're in RLE mode, keep counting down - --RLE_count; - } - // do I need to invert the image? - if ( tga_inverted ) - { - for (j = 0; j*2 < tga_height; ++j) - { - int index1 = j * tga_width * tga_comp; - int index2 = (tga_height - 1 - j) * tga_width * tga_comp; - for (i = tga_width * tga_comp; i > 0; --i) - { - unsigned char temp = tga_data[index1]; - tga_data[index1] = tga_data[index2]; - tga_data[index2] = temp; - ++index1; - ++index2; - } - } - } - // clear my palette, if I had one - if ( tga_palette != NULL ) - { - STBI_FREE( tga_palette ); - } + // do I need to load a palette? + if ( tga_indexed) + { + // any data to skip? (offset usually = 0) + stbi__skip(s, tga_palette_start ); + // load the palette + tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0); + if (!tga_palette) { + STBI_FREE(tga_data); + return stbi__errpuc("outofmem", "Out of memory"); + } + if (tga_rgb16) { + stbi_uc *pal_entry = tga_palette; + STBI_ASSERT(tga_comp == STBI_rgb); + for (i=0; i < tga_palette_len; ++i) { + stbi__tga_read_rgb16(s, pal_entry); + pal_entry += tga_comp; + } + } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) { + STBI_FREE(tga_data); + STBI_FREE(tga_palette); + return stbi__errpuc("bad palette", "Corrupt TGA"); + } + } + // load the data + for (i=0; i < tga_width * tga_height; ++i) + { + // if I'm in RLE mode, do I need to get a RLE stbi__pngchunk? + if ( tga_is_RLE ) + { + if ( RLE_count == 0 ) + { + // yep, get the next byte as a RLE command + int RLE_cmd = stbi__get8(s); + RLE_count = 1 + (RLE_cmd & 127); + RLE_repeating = RLE_cmd >> 7; + read_next_pixel = 1; + } else if ( !RLE_repeating ) + { + read_next_pixel = 1; + } + } else + { + read_next_pixel = 1; + } + // OK, if I need to read a pixel, do it now + if ( read_next_pixel ) + { + // load however much data we did have + if ( tga_indexed ) + { + // read in index, then perform the lookup + int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s); + if ( pal_idx >= tga_palette_len ) { + // invalid index + pal_idx = 0; + } + pal_idx *= tga_comp; + for (j = 0; j < tga_comp; ++j) { + raw_data[j] = tga_palette[pal_idx+j]; + } + } else if(tga_rgb16) { + STBI_ASSERT(tga_comp == STBI_rgb); + stbi__tga_read_rgb16(s, raw_data); + } else { + // read in the data raw + for (j = 0; j < tga_comp; ++j) { + raw_data[j] = stbi__get8(s); + } + } + // clear the reading flag for the next pixel + read_next_pixel = 0; + } // end of reading a pixel + + // copy data + for (j = 0; j < tga_comp; ++j) + tga_data[i*tga_comp+j] = raw_data[j]; + + // in case we're in RLE mode, keep counting down + --RLE_count; + } + // do I need to invert the image? + if ( tga_inverted ) + { + for (j = 0; j*2 < tga_height; ++j) + { + int index1 = j * tga_width * tga_comp; + int index2 = (tga_height - 1 - j) * tga_width * tga_comp; + for (i = tga_width * tga_comp; i > 0; --i) + { + unsigned char temp = tga_data[index1]; + tga_data[index1] = tga_data[index2]; + tga_data[index2] = temp; + ++index1; + ++index2; + } + } + } + // clear my palette, if I had one + if ( tga_palette != NULL ) + { + STBI_FREE( tga_palette ); + } } // swap RGB - if the source data was RGB16, it already is in the right order if (tga_comp >= 3 && !tga_rgb16) { - unsigned char* tga_pixel = tga_data; - for (i=0; i < tga_width * tga_height; ++i) - { - unsigned char temp = tga_pixel[0]; - tga_pixel[0] = tga_pixel[2]; - tga_pixel[2] = temp; - tga_pixel += tga_comp; - } + unsigned char* tga_pixel = tga_data; + for (i=0; i < tga_width * tga_height; ++i) + { + unsigned char temp = tga_pixel[0]; + tga_pixel[0] = tga_pixel[2]; + tga_pixel[2] = temp; + tga_pixel += tga_comp; + } } // convert to target component count if (req_comp && req_comp != tga_comp) - tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height); + tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height); - // the things I do to get rid of an error message, and yet keep - // Microsoft's C compilers happy... [8^( + // the things I do to get rid of an error message, and yet keep + // Microsoft's C compilers happy... [8^( tga_palette_start = tga_palette_len = tga_palette_bits = - tga_x_origin = tga_y_origin = 0; - // OK, done + tga_x_origin = tga_y_origin = 0; + // OK, done return tga_data; } #endif @@ -5664,33 +5664,33 @@ static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount) count = 0; while ((nleft = pixelCount - count) > 0) { - len = stbi__get8(s); - if (len == 128) { - // No-op. - } else if (len < 128) { - // Copy next len+1 bytes literally. - len++; - if (len > nleft) return 0; // corrupt data - count += len; - while (len) { - *p = stbi__get8(s); - p += 4; - len--; - } - } else if (len > 128) { - stbi_uc val; - // Next -len+1 bytes in the dest are replicated from next source byte. - // (Interpret len as a negative 8-bit int.) - len = 257 - len; - if (len > nleft) return 0; // corrupt data - val = stbi__get8(s); - count += len; - while (len) { - *p = val; - p += 4; - len--; - } - } + len = stbi__get8(s); + if (len == 128) { + // No-op. + } else if (len < 128) { + // Copy next len+1 bytes literally. + len++; + if (len > nleft) return 0; // corrupt data + count += len; + while (len) { + *p = stbi__get8(s); + p += 4; + len--; + } + } else if (len > 128) { + stbi_uc val; + // Next -len+1 bytes in the dest are replicated from next source byte. + // (Interpret len as a negative 8-bit int.) + len = 257 - len; + if (len > nleft) return 0; // corrupt data + val = stbi__get8(s); + count += len; + while (len) { + *p = val; + p += 4; + len--; + } + } } return 1; @@ -5707,12 +5707,12 @@ static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req STBI_NOTUSED(ri); // Check identifier - if (stbi__get32be(s) != 0x38425053) // "8BPS" - return stbi__errpuc("not PSD", "Corrupt PSD image"); + if (stbi__get32be(s) != 0x38425053) // "8BPS" + return stbi__errpuc("not PSD", "Corrupt PSD image"); // Check file type version. if (stbi__get16be(s) != 1) - return stbi__errpuc("wrong version", "Unsupported version of PSD image"); + return stbi__errpuc("wrong version", "Unsupported version of PSD image"); // Skip 6 reserved bytes. stbi__skip(s, 6 ); @@ -5720,7 +5720,7 @@ static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req // Read the number of channels (R, G, B, A, etc). channelCount = stbi__get16be(s); if (channelCount < 0 || channelCount > 16) - return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image"); + return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image"); // Read the rows and columns of the image. h = stbi__get32be(s); @@ -5729,25 +5729,25 @@ static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req // Make sure the depth is 8 bits. bitdepth = stbi__get16be(s); if (bitdepth != 8 && bitdepth != 16) - return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit"); + return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit"); // Make sure the color mode is RGB. // Valid options are: - // 0: Bitmap - // 1: Grayscale - // 2: Indexed color - // 3: RGB color - // 4: CMYK color - // 7: Multichannel - // 8: Duotone - // 9: Lab color + // 0: Bitmap + // 1: Grayscale + // 2: Indexed color + // 3: RGB color + // 4: CMYK color + // 7: Multichannel + // 8: Duotone + // 9: Lab color if (stbi__get16be(s) != 3) - return stbi__errpuc("wrong color format", "PSD is not in RGB color format"); + return stbi__errpuc("wrong color format", "PSD is not in RGB color format"); // Skip the Mode Data. (It's the palette for indexed color; other info for other modes.) stbi__skip(s,stbi__get32be(s) ); - // Skip the image resources. (resolution, pen tool paths, etc) + // Skip the image resources. (resolution, pen tool paths, etc) stbi__skip(s, stbi__get32be(s) ); // Skip the reserved data. @@ -5755,23 +5755,23 @@ static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req // Find out if the data is compressed. // Known values: - // 0: no compression - // 1: RLE compressed + // 0: no compression + // 1: RLE compressed compression = stbi__get16be(s); if (compression > 1) - return stbi__errpuc("bad compression", "PSD has an unknown compression format"); + return stbi__errpuc("bad compression", "PSD has an unknown compression format"); // Check size if (!stbi__mad3sizes_valid(4, w, h, 0)) - return stbi__errpuc("too large", "Corrupt PSD"); + return stbi__errpuc("too large", "Corrupt PSD"); // Create the destination image. if (!compression && bitdepth == 16 && bpc == 16) { - out = (stbi_uc *) stbi__malloc_mad3(8, w, h, 0); - ri->bits_per_channel = 16; + out = (stbi_uc *) stbi__malloc_mad3(8, w, h, 0); + ri->bits_per_channel = 16; } else - out = (stbi_uc *) stbi__malloc(4 * w*h); + out = (stbi_uc *) stbi__malloc(4 * w*h); if (!out) return stbi__errpuc("outofmem", "Out of memory"); pixelCount = w*h; @@ -5781,110 +5781,110 @@ static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req // Finally, the image data. if (compression) { - // RLE as used by .PSD and .TIFF - // Loop until you get the number of unpacked bytes you are expecting: - // Read the next source byte into n. - // If n is between 0 and 127 inclusive, copy the next n+1 bytes literally. - // Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times. - // Else if n is 128, noop. - // Endloop - - // The RLE-compressed data is preceeded by a 2-byte data count for each row in the data, - // which we're going to just skip. - stbi__skip(s, h * channelCount * 2 ); - - // Read the RLE data by channel. - for (channel = 0; channel < 4; channel++) { - stbi_uc *p; - - p = out+channel; - if (channel >= channelCount) { - // Fill this channel with default data. - for (i = 0; i < pixelCount; i++, p += 4) - *p = (channel == 3 ? 255 : 0); - } else { - // Read the RLE data. - if (!stbi__psd_decode_rle(s, p, pixelCount)) { - STBI_FREE(out); - return stbi__errpuc("corrupt", "bad RLE data"); - } - } - } + // RLE as used by .PSD and .TIFF + // Loop until you get the number of unpacked bytes you are expecting: + // Read the next source byte into n. + // If n is between 0 and 127 inclusive, copy the next n+1 bytes literally. + // Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times. + // Else if n is 128, noop. + // Endloop + + // The RLE-compressed data is preceeded by a 2-byte data count for each row in the data, + // which we're going to just skip. + stbi__skip(s, h * channelCount * 2 ); + + // Read the RLE data by channel. + for (channel = 0; channel < 4; channel++) { + stbi_uc *p; + + p = out+channel; + if (channel >= channelCount) { + // Fill this channel with default data. + for (i = 0; i < pixelCount; i++, p += 4) + *p = (channel == 3 ? 255 : 0); + } else { + // Read the RLE data. + if (!stbi__psd_decode_rle(s, p, pixelCount)) { + STBI_FREE(out); + return stbi__errpuc("corrupt", "bad RLE data"); + } + } + } } else { - // We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...) - // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image. - - // Read the data by channel. - for (channel = 0; channel < 4; channel++) { - if (channel >= channelCount) { - // Fill this channel with default data. - if (bitdepth == 16 && bpc == 16) { - stbi__uint16 *q = ((stbi__uint16 *) out) + channel; - stbi__uint16 val = channel == 3 ? 65535 : 0; - for (i = 0; i < pixelCount; i++, q += 4) - *q = val; - } else { - stbi_uc *p = out+channel; - stbi_uc val = channel == 3 ? 255 : 0; - for (i = 0; i < pixelCount; i++, p += 4) - *p = val; - } - } else { - if (ri->bits_per_channel == 16) { // output bpc - stbi__uint16 *q = ((stbi__uint16 *) out) + channel; - for (i = 0; i < pixelCount; i++, q += 4) - *q = (stbi__uint16) stbi__get16be(s); - } else { - stbi_uc *p = out+channel; - if (bitdepth == 16) { // input bpc - for (i = 0; i < pixelCount; i++, p += 4) - *p = (stbi_uc) (stbi__get16be(s) >> 8); - } else { - for (i = 0; i < pixelCount; i++, p += 4) - *p = stbi__get8(s); - } - } - } - } + // We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...) + // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image. + + // Read the data by channel. + for (channel = 0; channel < 4; channel++) { + if (channel >= channelCount) { + // Fill this channel with default data. + if (bitdepth == 16 && bpc == 16) { + stbi__uint16 *q = ((stbi__uint16 *) out) + channel; + stbi__uint16 val = channel == 3 ? 65535 : 0; + for (i = 0; i < pixelCount; i++, q += 4) + *q = val; + } else { + stbi_uc *p = out+channel; + stbi_uc val = channel == 3 ? 255 : 0; + for (i = 0; i < pixelCount; i++, p += 4) + *p = val; + } + } else { + if (ri->bits_per_channel == 16) { // output bpc + stbi__uint16 *q = ((stbi__uint16 *) out) + channel; + for (i = 0; i < pixelCount; i++, q += 4) + *q = (stbi__uint16) stbi__get16be(s); + } else { + stbi_uc *p = out+channel; + if (bitdepth == 16) { // input bpc + for (i = 0; i < pixelCount; i++, p += 4) + *p = (stbi_uc) (stbi__get16be(s) >> 8); + } else { + for (i = 0; i < pixelCount; i++, p += 4) + *p = stbi__get8(s); + } + } + } + } } // remove weird white matte from PSD if (channelCount >= 4) { - if (ri->bits_per_channel == 16) { - for (i=0; i < w*h; ++i) { - stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i; - if (pixel[3] != 0 && pixel[3] != 65535) { - float a = pixel[3] / 65535.0f; - float ra = 1.0f / a; - float inv_a = 65535.0f * (1 - ra); - pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a); - pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a); - pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a); - } - } - } else { - for (i=0; i < w*h; ++i) { - unsigned char *pixel = out + 4*i; - if (pixel[3] != 0 && pixel[3] != 255) { - float a = pixel[3] / 255.0f; - float ra = 1.0f / a; - float inv_a = 255.0f * (1 - ra); - pixel[0] = (unsigned char) (pixel[0]*ra + inv_a); - pixel[1] = (unsigned char) (pixel[1]*ra + inv_a); - pixel[2] = (unsigned char) (pixel[2]*ra + inv_a); - } - } - } + if (ri->bits_per_channel == 16) { + for (i=0; i < w*h; ++i) { + stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i; + if (pixel[3] != 0 && pixel[3] != 65535) { + float a = pixel[3] / 65535.0f; + float ra = 1.0f / a; + float inv_a = 65535.0f * (1 - ra); + pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a); + pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a); + pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a); + } + } + } else { + for (i=0; i < w*h; ++i) { + unsigned char *pixel = out + 4*i; + if (pixel[3] != 0 && pixel[3] != 255) { + float a = pixel[3] / 255.0f; + float ra = 1.0f / a; + float inv_a = 255.0f * (1 - ra); + pixel[0] = (unsigned char) (pixel[0]*ra + inv_a); + pixel[1] = (unsigned char) (pixel[1]*ra + inv_a); + pixel[2] = (unsigned char) (pixel[2]*ra + inv_a); + } + } + } } // convert to desired output format if (req_comp && req_comp != 4) { - if (ri->bits_per_channel == 16) - out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h); - else - out = stbi__convert_format(out, 4, req_comp, w, h); - if (out == NULL) return out; // stbi__convert_format frees input on failure + if (ri->bits_per_channel == 16) + out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h); + else + out = stbi__convert_format(out, 4, req_comp, w, h); + if (out == NULL) return out; // stbi__convert_format frees input on failure } if (comp) *comp = 4; @@ -5907,8 +5907,8 @@ static int stbi__pic_is4(stbi__context *s,const char *str) { int i; for (i=0; i<4; ++i) - if (stbi__get8(s) != (stbi_uc)str[i]) - return 0; + if (stbi__get8(s) != (stbi_uc)str[i]) + return 0; return 1; } @@ -5918,13 +5918,13 @@ static int stbi__pic_test_core(stbi__context *s) int i; if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) - return 0; + return 0; for(i=0;i<84;++i) - stbi__get8(s); + stbi__get8(s); if (!stbi__pic_is4(s,"PICT")) - return 0; + return 0; return 1; } @@ -5939,10 +5939,10 @@ static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest) int mask=0x80, i; for (i=0; i<4; ++i, mask>>=1) { - if (channel & mask) { - if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short"); - dest[i]=stbi__get8(s); - } + if (channel & mask) { + if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short"); + dest[i]=stbi__get8(s); + } } return dest; @@ -5953,8 +5953,8 @@ static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src) int mask=0x80,i; for (i=0;i<4; ++i, mask>>=1) - if (channel&mask) - dest[i]=src[i]; + if (channel&mask) + dest[i]=src[i]; } static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *comp, stbi_uc *result) @@ -5963,105 +5963,105 @@ static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *c stbi__pic_packet packets[10]; // this will (should...) cater for even some bizarre stuff like having data - // for the same channel in multiple packets. + // for the same channel in multiple packets. do { - stbi__pic_packet *packet; + stbi__pic_packet *packet; - if (num_packets==sizeof(packets)/sizeof(packets[0])) - return stbi__errpuc("bad format","too many packets"); + if (num_packets==sizeof(packets)/sizeof(packets[0])) + return stbi__errpuc("bad format","too many packets"); - packet = &packets[num_packets++]; + packet = &packets[num_packets++]; - chained = stbi__get8(s); - packet->size = stbi__get8(s); - packet->type = stbi__get8(s); - packet->channel = stbi__get8(s); + chained = stbi__get8(s); + packet->size = stbi__get8(s); + packet->type = stbi__get8(s); + packet->channel = stbi__get8(s); - act_comp |= packet->channel; + act_comp |= packet->channel; - if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (reading packets)"); - if (packet->size != 8) return stbi__errpuc("bad format","packet isn't 8bpp"); + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (reading packets)"); + if (packet->size != 8) return stbi__errpuc("bad format","packet isn't 8bpp"); } while (chained); *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel? for(y=0; ytype) { - default: - return stbi__errpuc("bad format","packet has bad compression type"); - - case 0: {//uncompressed - int x; - - for(x=0;xchannel,dest)) - return 0; - break; - } - - case 1://Pure RLE - { - int left=width, i; - - while (left>0) { - stbi_uc count,value[4]; - - count=stbi__get8(s); - if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pure read count)"); - - if (count > left) - count = (stbi_uc) left; - - if (!stbi__readval(s,packet->channel,value)) return 0; - - for(i=0; ichannel,dest,value); - left -= count; - } - } - break; - - case 2: {//Mixed RLE - int left=width; - while (left>0) { - int count = stbi__get8(s), i; - if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (mixed read count)"); - - if (count >= 128) { // Repeated - stbi_uc value[4]; - - if (count==128) - count = stbi__get16be(s); - else - count -= 127; - if (count > left) - return stbi__errpuc("bad file","scanline overrun"); - - if (!stbi__readval(s,packet->channel,value)) - return 0; - - for(i=0;ichannel,dest,value); - } else { // Raw - ++count; - if (count>left) return stbi__errpuc("bad file","scanline overrun"); - - for(i=0;ichannel,dest)) - return 0; - } - left-=count; - } - break; - } - } - } + int packet_idx; + + for(packet_idx=0; packet_idx < num_packets; ++packet_idx) { + stbi__pic_packet *packet = &packets[packet_idx]; + stbi_uc *dest = result+y*width*4; + + switch (packet->type) { + default: + return stbi__errpuc("bad format","packet has bad compression type"); + + case 0: {//uncompressed + int x; + + for(x=0;xchannel,dest)) + return 0; + break; + } + + case 1://Pure RLE + { + int left=width, i; + + while (left>0) { + stbi_uc count,value[4]; + + count=stbi__get8(s); + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pure read count)"); + + if (count > left) + count = (stbi_uc) left; + + if (!stbi__readval(s,packet->channel,value)) return 0; + + for(i=0; ichannel,dest,value); + left -= count; + } + } + break; + + case 2: {//Mixed RLE + int left=width; + while (left>0) { + int count = stbi__get8(s), i; + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (mixed read count)"); + + if (count >= 128) { // Repeated + stbi_uc value[4]; + + if (count==128) + count = stbi__get16be(s); + else + count -= 127; + if (count > left) + return stbi__errpuc("bad file","scanline overrun"); + + if (!stbi__readval(s,packet->channel,value)) + return 0; + + for(i=0;ichannel,dest,value); + } else { // Raw + ++count; + if (count>left) return stbi__errpuc("bad file","scanline overrun"); + + for(i=0;ichannel,dest)) + return 0; + } + left-=count; + } + break; + } + } + } } return result; @@ -6076,11 +6076,11 @@ static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_c if (!comp) comp = &internal_comp; for (i=0; i<92; ++i) - stbi__get8(s); + stbi__get8(s); x = stbi__get16be(s); y = stbi__get16be(s); - if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pic header)"); + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pic header)"); if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode"); stbi__get32be(s); //skip `ratio' @@ -6092,8 +6092,8 @@ static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_c memset(result, 0xff, x*y*4); if (!stbi__pic_load_core(s,x,y,comp, result)) { - STBI_FREE(result); - result=0; + STBI_FREE(result); + result=0; } *px = x; *py = y; @@ -6125,11 +6125,11 @@ typedef struct typedef struct { int w,h; - stbi_uc *out; // output buffer (always 4 components) - stbi_uc *background; // The current "background" as far as a gif is concerned + stbi_uc *out; // output buffer (always 4 components) + stbi_uc *background; // The current "background" as far as a gif is concerned stbi_uc *history; int flags, bgindex, ratio, transparent, eflags; - stbi_uc pal[256][4]; + stbi_uc pal[256][4]; stbi_uc lpal[256][4]; stbi__gif_lzw codes[8192]; stbi_uc *color_table; @@ -6163,10 +6163,10 @@ static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], in { int i; for (i=0; i < num_entries; ++i) { - pal[i][2] = stbi__get8(s); - pal[i][1] = stbi__get8(s); - pal[i][0] = stbi__get8(s); - pal[i][3] = transp == i ? 0 : 255; + pal[i][2] = stbi__get8(s); + pal[i][1] = stbi__get8(s); + pal[i][0] = stbi__get8(s); + pal[i][3] = transp == i ? 0 : 255; } } @@ -6174,11 +6174,11 @@ static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_in { stbi_uc version; if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') - return stbi__err("not GIF", "Corrupt GIF"); + return stbi__err("not GIF", "Corrupt GIF"); version = stbi__get8(s); - if (version != '7' && version != '9') return stbi__err("not GIF", "Corrupt GIF"); - if (stbi__get8(s) != 'a') return stbi__err("not GIF", "Corrupt GIF"); + if (version != '7' && version != '9') return stbi__err("not GIF", "Corrupt GIF"); + if (stbi__get8(s) != 'a') return stbi__err("not GIF", "Corrupt GIF"); stbi__g_failure_reason = ""; g->w = stbi__get16le(s); @@ -6193,7 +6193,7 @@ static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_in if (is_info) return 1; if (g->flags & 0x80) - stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1); + stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1); return 1; } @@ -6202,9 +6202,9 @@ static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp) { stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif)); if (!stbi__gif_header(s, g, comp, 1)) { - STBI_FREE(g); - stbi__rewind( s ); - return 0; + STBI_FREE(g); + stbi__rewind( s ); + return 0; } if (x) *x = g->w; if (y) *y = g->h; @@ -6220,32 +6220,32 @@ static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code) // recurse to decode the prefixes, since the linked-list is backwards, // and working backwards through an interleaved image would be nasty if (g->codes[code].prefix >= 0) - stbi__out_gif_code(g, g->codes[code].prefix); + stbi__out_gif_code(g, g->codes[code].prefix); if (g->cur_y >= g->max_y) return; idx = g->cur_x + g->cur_y; p = &g->out[idx]; - g->history[idx / 4] = 1; + g->history[idx / 4] = 1; c = &g->color_table[g->codes[code].suffix * 4]; if (c[3] > 128) { // don't render transparent pixels; - p[0] = c[2]; - p[1] = c[1]; - p[2] = c[0]; - p[3] = c[3]; + p[0] = c[2]; + p[1] = c[1]; + p[2] = c[0]; + p[3] = c[3]; } g->cur_x += 4; if (g->cur_x >= g->max_x) { - g->cur_x = g->start_x; - g->cur_y += g->step; + g->cur_x = g->start_x; + g->cur_y += g->step; - while (g->cur_y >= g->max_y && g->parse > 0) { - g->step = (1 << g->parse) * g->line_size; - g->cur_y = g->start_y + (g->step >> 1); - --g->parse; - } + while (g->cur_y >= g->max_y && g->parse > 0) { + g->step = (1 << g->parse) * g->line_size; + g->cur_y = g->start_y + (g->step >> 1); + --g->parse; + } } } @@ -6266,9 +6266,9 @@ static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g) bits = 0; valid_bits = 0; for (init_code = 0; init_code < clear; init_code++) { - g->codes[init_code].prefix = -1; - g->codes[init_code].first = (stbi_uc) init_code; - g->codes[init_code].suffix = (stbi_uc) init_code; + g->codes[init_code].prefix = -1; + g->codes[init_code].first = (stbi_uc) init_code; + g->codes[init_code].suffix = (stbi_uc) init_code; } // support no starting clear code @@ -6277,60 +6277,60 @@ static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g) len = 0; for(;;) { - if (valid_bits < codesize) { - if (len == 0) { - len = stbi__get8(s); // start new block - if (len == 0) - return g->out; - } - --len; - bits |= (stbi__int32) stbi__get8(s) << valid_bits; - valid_bits += 8; - } else { - stbi__int32 code = bits & codemask; - bits >>= codesize; - valid_bits -= codesize; - // @OPTIMIZE: is there some way we can accelerate the non-clear path? - if (code == clear) { // clear code - codesize = lzw_cs + 1; - codemask = (1 << codesize) - 1; - avail = clear + 2; - oldcode = -1; - first = 0; - } else if (code == clear + 1) { // end of stream code - stbi__skip(s, len); - while ((len = stbi__get8(s)) > 0) - stbi__skip(s,len); - return g->out; - } else if (code <= avail) { - if (first) { - return stbi__errpuc("no clear code", "Corrupt GIF"); - } - - if (oldcode >= 0) { - p = &g->codes[avail++]; - if (avail > 8192) { - return stbi__errpuc("too many codes", "Corrupt GIF"); - } - - p->prefix = (stbi__int16) oldcode; - p->first = g->codes[oldcode].first; - p->suffix = (code == avail) ? p->first : g->codes[code].first; - } else if (code == avail) - return stbi__errpuc("illegal code in raster", "Corrupt GIF"); - - stbi__out_gif_code(g, (stbi__uint16) code); - - if ((avail & codemask) == 0 && avail <= 0x0FFF) { - codesize++; - codemask = (1 << codesize) - 1; - } - - oldcode = code; - } else { - return stbi__errpuc("illegal code in raster", "Corrupt GIF"); - } - } + if (valid_bits < codesize) { + if (len == 0) { + len = stbi__get8(s); // start new block + if (len == 0) + return g->out; + } + --len; + bits |= (stbi__int32) stbi__get8(s) << valid_bits; + valid_bits += 8; + } else { + stbi__int32 code = bits & codemask; + bits >>= codesize; + valid_bits -= codesize; + // @OPTIMIZE: is there some way we can accelerate the non-clear path? + if (code == clear) { // clear code + codesize = lzw_cs + 1; + codemask = (1 << codesize) - 1; + avail = clear + 2; + oldcode = -1; + first = 0; + } else if (code == clear + 1) { // end of stream code + stbi__skip(s, len); + while ((len = stbi__get8(s)) > 0) + stbi__skip(s,len); + return g->out; + } else if (code <= avail) { + if (first) { + return stbi__errpuc("no clear code", "Corrupt GIF"); + } + + if (oldcode >= 0) { + p = &g->codes[avail++]; + if (avail > 8192) { + return stbi__errpuc("too many codes", "Corrupt GIF"); + } + + p->prefix = (stbi__int16) oldcode; + p->first = g->codes[oldcode].first; + p->suffix = (code == avail) ? p->first : g->codes[code].first; + } else if (code == avail) + return stbi__errpuc("illegal code in raster", "Corrupt GIF"); + + stbi__out_gif_code(g, (stbi__uint16) code); + + if ((avail & codemask) == 0 && avail <= 0x0FFF) { + codesize++; + codemask = (1 << codesize) - 1; + } + + oldcode = code; + } else { + return stbi__errpuc("illegal code in raster", "Corrupt GIF"); + } + } } } @@ -6346,217 +6346,217 @@ static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, i // on first frame, any non-written pixels get the background colour (non-transparent) first_frame = 0; if (g->out == 0) { - if (!stbi__gif_header(s, g, comp,0)) return 0; // stbi__g_failure_reason set by stbi__gif_header - g->out = (stbi_uc *) stbi__malloc(4 * g->w * g->h); - g->background = (stbi_uc *) stbi__malloc(4 * g->w * g->h); - g->history = (stbi_uc *) stbi__malloc(g->w * g->h); - if (g->out == 0) return stbi__errpuc("outofmem", "Out of memory"); - - // image is treated as "tranparent" at the start - ie, nothing overwrites the current background; - // background colour is only used for pixels that are not rendered first frame, after that "background" - // color refers to teh color that was there the previous frame. - memset( g->out, 0x00, 4 * g->w * g->h ); - memset( g->background, 0x00, 4 * g->w * g->h ); // state of the background (starts transparent) - memset( g->history, 0x00, g->w * g->h ); // pixels that were affected previous frame - first_frame = 1; + if (!stbi__gif_header(s, g, comp,0)) return 0; // stbi__g_failure_reason set by stbi__gif_header + g->out = (stbi_uc *) stbi__malloc(4 * g->w * g->h); + g->background = (stbi_uc *) stbi__malloc(4 * g->w * g->h); + g->history = (stbi_uc *) stbi__malloc(g->w * g->h); + if (g->out == 0) return stbi__errpuc("outofmem", "Out of memory"); + + // image is treated as "tranparent" at the start - ie, nothing overwrites the current background; + // background colour is only used for pixels that are not rendered first frame, after that "background" + // color refers to teh color that was there the previous frame. + memset( g->out, 0x00, 4 * g->w * g->h ); + memset( g->background, 0x00, 4 * g->w * g->h ); // state of the background (starts transparent) + memset( g->history, 0x00, g->w * g->h ); // pixels that were affected previous frame + first_frame = 1; } else { - // second frame - how do we dispoase of the previous one? - dispose = (g->eflags & 0x1C) >> 2; - pcount = g->w * g->h; - - if ((dispose == 3) && (two_back == 0)) { - dispose = 2; // if I don't have an image to revert back to, default to the old background - } - - if (dispose == 3) { // use previous graphic - for (pi = 0; pi < pcount; ++pi) { - if (g->history[pi]) { - memcpy( &g->out[pi * 4], &two_back[pi * 4], 4 ); - } - } - } else if (dispose == 2) { - // restore what was changed last frame to background before that frame; - for (pi = 0; pi < pcount; ++pi) { - if (g->history[pi]) { - memcpy( &g->out[pi * 4], &g->background[pi * 4], 4 ); - } - } - } else { - // This is a non-disposal case eithe way, so just - // leave the pixels as is, and they will become the new background - // 1: do not dispose - // 0: not specified. - } - - // background is what out is after the undoing of the previou frame; - memcpy( g->background, g->out, 4 * g->w * g->h ); + // second frame - how do we dispoase of the previous one? + dispose = (g->eflags & 0x1C) >> 2; + pcount = g->w * g->h; + + if ((dispose == 3) && (two_back == 0)) { + dispose = 2; // if I don't have an image to revert back to, default to the old background + } + + if (dispose == 3) { // use previous graphic + for (pi = 0; pi < pcount; ++pi) { + if (g->history[pi]) { + memcpy( &g->out[pi * 4], &two_back[pi * 4], 4 ); + } + } + } else if (dispose == 2) { + // restore what was changed last frame to background before that frame; + for (pi = 0; pi < pcount; ++pi) { + if (g->history[pi]) { + memcpy( &g->out[pi * 4], &g->background[pi * 4], 4 ); + } + } + } else { + // This is a non-disposal case eithe way, so just + // leave the pixels as is, and they will become the new background + // 1: do not dispose + // 0: not specified. + } + + // background is what out is after the undoing of the previou frame; + memcpy( g->background, g->out, 4 * g->w * g->h ); } // clear my history; - memset( g->history, 0x00, g->w * g->h ); // pixels that were affected previous frame + memset( g->history, 0x00, g->w * g->h ); // pixels that were affected previous frame for (;;) { - int tag = stbi__get8(s); - switch (tag) { - case 0x2C: /* Image Descriptor */ - { - stbi__int32 x, y, w, h; - stbi_uc *o; - - x = stbi__get16le(s); - y = stbi__get16le(s); - w = stbi__get16le(s); - h = stbi__get16le(s); - if (((x + w) > (g->w)) || ((y + h) > (g->h))) - return stbi__errpuc("bad Image Descriptor", "Corrupt GIF"); - - g->line_size = g->w * 4; - g->start_x = x * 4; - g->start_y = y * g->line_size; - g->max_x = g->start_x + w * 4; - g->max_y = g->start_y + h * g->line_size; - g->cur_x = g->start_x; - g->cur_y = g->start_y; - - g->lflags = stbi__get8(s); - - if (g->lflags & 0x40) { - g->step = 8 * g->line_size; // first interlaced spacing - g->parse = 3; - } else { - g->step = g->line_size; - g->parse = 0; - } - - if (g->lflags & 0x80) { - stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1); - g->color_table = (stbi_uc *) g->lpal; - } else if (g->flags & 0x80) { - g->color_table = (stbi_uc *) g->pal; - } else - return stbi__errpuc("missing color table", "Corrupt GIF"); - - o = stbi__process_gif_raster(s, g); - if (o == NULL) return NULL; - - // if this was the first frame, - pcount = g->w * g->h; - if (first_frame && (g->bgindex > 0)) { - // if first frame, any pixel not drawn to gets the background color - for (pi = 0; pi < pcount; ++pi) { - if (g->history[pi] == 0) { - g->pal[g->bgindex][3] = 255; // just in case it was made transparent, undo that; It will be reset next frame if need be; - memcpy( &g->out[pi * 4], &g->pal[g->bgindex], 4 ); - } - } - } - - return o; - } - - case 0x21: // Comment Extension. - { - int len; - int ext = stbi__get8(s); - if (ext == 0xF9) { // Graphic Control Extension. - len = stbi__get8(s); - if (len == 4) { - g->eflags = stbi__get8(s); - g->delay = 10 * stbi__get16le(s); // delay - 1/100th of a second, saving as 1/1000ths. - - // unset old transparent - if (g->transparent >= 0) { - g->pal[g->transparent][3] = 255; - } - if (g->eflags & 0x01) { - g->transparent = stbi__get8(s); - if (g->transparent >= 0) { - g->pal[g->transparent][3] = 0; - } - } else { - // don't need transparent - stbi__skip(s, 1); - g->transparent = -1; - } - } else { - stbi__skip(s, len); - break; - } - } - while ((len = stbi__get8(s)) != 0) { - stbi__skip(s, len); - } - break; - } - - case 0x3B: // gif stream termination code - return (stbi_uc *) s; // using '1' causes warning on some compilers - - default: - return stbi__errpuc("unknown code", "Corrupt GIF"); - } + int tag = stbi__get8(s); + switch (tag) { + case 0x2C: /* Image Descriptor */ + { + stbi__int32 x, y, w, h; + stbi_uc *o; + + x = stbi__get16le(s); + y = stbi__get16le(s); + w = stbi__get16le(s); + h = stbi__get16le(s); + if (((x + w) > (g->w)) || ((y + h) > (g->h))) + return stbi__errpuc("bad Image Descriptor", "Corrupt GIF"); + + g->line_size = g->w * 4; + g->start_x = x * 4; + g->start_y = y * g->line_size; + g->max_x = g->start_x + w * 4; + g->max_y = g->start_y + h * g->line_size; + g->cur_x = g->start_x; + g->cur_y = g->start_y; + + g->lflags = stbi__get8(s); + + if (g->lflags & 0x40) { + g->step = 8 * g->line_size; // first interlaced spacing + g->parse = 3; + } else { + g->step = g->line_size; + g->parse = 0; + } + + if (g->lflags & 0x80) { + stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1); + g->color_table = (stbi_uc *) g->lpal; + } else if (g->flags & 0x80) { + g->color_table = (stbi_uc *) g->pal; + } else + return stbi__errpuc("missing color table", "Corrupt GIF"); + + o = stbi__process_gif_raster(s, g); + if (o == NULL) return NULL; + + // if this was the first frame, + pcount = g->w * g->h; + if (first_frame && (g->bgindex > 0)) { + // if first frame, any pixel not drawn to gets the background color + for (pi = 0; pi < pcount; ++pi) { + if (g->history[pi] == 0) { + g->pal[g->bgindex][3] = 255; // just in case it was made transparent, undo that; It will be reset next frame if need be; + memcpy( &g->out[pi * 4], &g->pal[g->bgindex], 4 ); + } + } + } + + return o; + } + + case 0x21: // Comment Extension. + { + int len; + int ext = stbi__get8(s); + if (ext == 0xF9) { // Graphic Control Extension. + len = stbi__get8(s); + if (len == 4) { + g->eflags = stbi__get8(s); + g->delay = 10 * stbi__get16le(s); // delay - 1/100th of a second, saving as 1/1000ths. + + // unset old transparent + if (g->transparent >= 0) { + g->pal[g->transparent][3] = 255; + } + if (g->eflags & 0x01) { + g->transparent = stbi__get8(s); + if (g->transparent >= 0) { + g->pal[g->transparent][3] = 0; + } + } else { + // don't need transparent + stbi__skip(s, 1); + g->transparent = -1; + } + } else { + stbi__skip(s, len); + break; + } + } + while ((len = stbi__get8(s)) != 0) { + stbi__skip(s, len); + } + break; + } + + case 0x3B: // gif stream termination code + return (stbi_uc *) s; // using '1' causes warning on some compilers + + default: + return stbi__errpuc("unknown code", "Corrupt GIF"); + } } } static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp) { if (stbi__gif_test(s)) { - int layers = 0; - stbi_uc *u = 0; - stbi_uc *out = 0; - stbi_uc *two_back = 0; - stbi__gif g; - int stride; - memset(&g, 0, sizeof(g)); - if (delays) { - *delays = 0; - } - - do { - u = stbi__gif_load_next(s, &g, comp, req_comp, two_back); - if (u == (stbi_uc *) s) u = 0; // end of animated gif marker - - if (u) { - *x = g.w; - *y = g.h; - ++layers; - stride = g.w * g.h * 4; - - if (out) { - out = (stbi_uc*) STBI_REALLOC( out, layers * stride ); - if (delays) { - *delays = (int*) STBI_REALLOC( *delays, sizeof(int) * layers ); - } - } else { - out = (stbi_uc*)stbi__malloc( layers * stride ); - if (delays) { - *delays = (int*) stbi__malloc( layers * sizeof(int) ); - } - } - memcpy( out + ((layers - 1) * stride), u, stride ); - if (layers >= 2) { - two_back = out - 2 * stride; - } - - if (delays) { - (*delays)[layers - 1U] = g.delay; - } - } - } while (u != 0); - - // free temp buffer; - STBI_FREE(g.out); - STBI_FREE(g.history); - STBI_FREE(g.background); - - // do the final conversion after loading everything; - if (req_comp && req_comp != 4) - out = stbi__convert_format(out, 4, req_comp, layers * g.w, g.h); - - *z = layers; - return out; + int layers = 0; + stbi_uc *u = 0; + stbi_uc *out = 0; + stbi_uc *two_back = 0; + stbi__gif g; + int stride; + memset(&g, 0, sizeof(g)); + if (delays) { + *delays = 0; + } + + do { + u = stbi__gif_load_next(s, &g, comp, req_comp, two_back); + if (u == (stbi_uc *) s) u = 0; // end of animated gif marker + + if (u) { + *x = g.w; + *y = g.h; + ++layers; + stride = g.w * g.h * 4; + + if (out) { + out = (stbi_uc*) STBI_REALLOC( out, layers * stride ); + if (delays) { + *delays = (int*) STBI_REALLOC( *delays, sizeof(int) * layers ); + } + } else { + out = (stbi_uc*)stbi__malloc( layers * stride ); + if (delays) { + *delays = (int*) stbi__malloc( layers * sizeof(int) ); + } + } + memcpy( out + ((layers - 1) * stride), u, stride ); + if (layers >= 2) { + two_back = out - 2 * stride; + } + + if (delays) { + (*delays)[layers - 1U] = g.delay; + } + } + } while (u != 0); + + // free temp buffer; + STBI_FREE(g.out); + STBI_FREE(g.history); + STBI_FREE(g.background); + + // do the final conversion after loading everything; + if (req_comp && req_comp != 4) + out = stbi__convert_format(out, 4, req_comp, layers * g.w, g.h); + + *z = layers; + return out; } else { - return stbi__errpuc("not GIF", "Image was not as a gif type."); + return stbi__errpuc("not GIF", "Image was not as a gif type."); } } @@ -6569,13 +6569,13 @@ static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req u = stbi__gif_load_next(s, &g, comp, req_comp, 0); if (u == (stbi_uc *) s) u = 0; // end of animated gif marker if (u) { - *x = g.w; - *y = g.h; + *x = g.w; + *y = g.h; - // moved conversion to after successful load so that the same - // can be done for multiple frames. - if (req_comp && req_comp != 4) - u = stbi__convert_format(u, 4, req_comp, g.w, g.h); + // moved conversion to after successful load so that the same + // can be done for multiple frames. + if (req_comp && req_comp != 4) + u = stbi__convert_format(u, 4, req_comp, g.w, g.h); } // free buffers needed for multiple frame loading; @@ -6599,8 +6599,8 @@ static int stbi__hdr_test_core(stbi__context *s, const char *signature) { int i; for (i=0; signature[i]; ++i) - if (stbi__get8(s) != signature[i]) - return 0; + if (stbi__get8(s) != signature[i]) + return 0; stbi__rewind(s); return 1; } @@ -6610,8 +6610,8 @@ static int stbi__hdr_test(stbi__context* s) int r = stbi__hdr_test_core(s, "#?RADIANCE\n"); stbi__rewind(s); if(!r) { - r = stbi__hdr_test_core(s, "#?RGBE\n"); - stbi__rewind(s); + r = stbi__hdr_test_core(s, "#?RGBE\n"); + stbi__rewind(s); } return r; } @@ -6625,14 +6625,14 @@ static char *stbi__hdr_gettoken(stbi__context *z, char *buffer) c = (char) stbi__get8(z); while (!stbi__at_eof(z) && c != '\n') { - buffer[len++] = c; - if (len == STBI__HDR_BUFLEN-1) { - // flush to end of line - while (!stbi__at_eof(z) && stbi__get8(z) != '\n') - ; - break; - } - c = (char) stbi__get8(z); + buffer[len++] = c; + if (len == STBI__HDR_BUFLEN-1) { + // flush to end of line + while (!stbi__at_eof(z) && stbi__get8(z) != '\n') + ; + break; + } + c = (char) stbi__get8(z); } buffer[len] = 0; @@ -6642,27 +6642,27 @@ static char *stbi__hdr_gettoken(stbi__context *z, char *buffer) static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp) { if ( input[3] != 0 ) { - float f1; - // Exponent - f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8)); - if (req_comp <= 2) - output[0] = (input[0] + input[1] + input[2]) * f1 / 3; - else { - output[0] = input[0] * f1; - output[1] = input[1] * f1; - output[2] = input[2] * f1; - } - if (req_comp == 2) output[1] = 1; - if (req_comp == 4) output[3] = 1; + float f1; + // Exponent + f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8)); + if (req_comp <= 2) + output[0] = (input[0] + input[1] + input[2]) * f1 / 3; + else { + output[0] = input[0] * f1; + output[1] = input[1] * f1; + output[2] = input[2] * f1; + } + if (req_comp == 2) output[1] = 1; + if (req_comp == 4) output[3] = 1; } else { - switch (req_comp) { - case 4: output[3] = 1; /* fallthrough */ - case 3: output[0] = output[1] = output[2] = 0; - break; - case 2: output[1] = 1; /* fallthrough */ - case 1: output[0] = 0; - break; - } + switch (req_comp) { + case 4: output[3] = 1; /* fallthrough */ + case 3: output[0] = output[1] = output[2] = 0; + break; + case 2: output[1] = 1; /* fallthrough */ + case 1: output[0] = 0; + break; + } } } @@ -6683,16 +6683,16 @@ static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int re // Check identifier headerToken = stbi__hdr_gettoken(s,buffer); if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0) - return stbi__errpf("not HDR", "Corrupt HDR image"); + return stbi__errpf("not HDR", "Corrupt HDR image"); // Parse header for(;;) { - token = stbi__hdr_gettoken(s,buffer); - if (token[0] == 0) break; - if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; + token = stbi__hdr_gettoken(s,buffer); + if (token[0] == 0) break; + if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; } - if (!valid) return stbi__errpf("unsupported format", "Unsupported HDR format"); + if (!valid) return stbi__errpf("unsupported format", "Unsupported HDR format"); // Parse width and height // can't use sscanf() if we're not using stdio! @@ -6712,83 +6712,83 @@ static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int re if (req_comp == 0) req_comp = 3; if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0)) - return stbi__errpf("too large", "HDR image is too large"); + return stbi__errpf("too large", "HDR image is too large"); // Read data hdr_data = (float *) stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0); if (!hdr_data) - return stbi__errpf("outofmem", "Out of memory"); + return stbi__errpf("outofmem", "Out of memory"); // Load image data // image data is stored as some number of sca if ( width < 8 || width >= 32768) { - // Read flat data - for (j=0; j < height; ++j) { - for (i=0; i < width; ++i) { - stbi_uc rgbe[4]; - main_decode_loop: - stbi__getn(s, rgbe, 4); - stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp); - } - } + // Read flat data + for (j=0; j < height; ++j) { + for (i=0; i < width; ++i) { + stbi_uc rgbe[4]; + main_decode_loop: + stbi__getn(s, rgbe, 4); + stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp); + } + } } else { - // Read RLE-encoded data - scanline = NULL; - - for (j = 0; j < height; ++j) { - c1 = stbi__get8(s); - c2 = stbi__get8(s); - len = stbi__get8(s); - if (c1 != 2 || c2 != 2 || (len & 0x80)) { - // not run-length encoded, so we have to actually use THIS data as a decoded - // pixel (note this can't be a valid pixel--one of RGB must be >= 128) - stbi_uc rgbe[4]; - rgbe[0] = (stbi_uc) c1; - rgbe[1] = (stbi_uc) c2; - rgbe[2] = (stbi_uc) len; - rgbe[3] = (stbi_uc) stbi__get8(s); - stbi__hdr_convert(hdr_data, rgbe, req_comp); - i = 1; - j = 0; - STBI_FREE(scanline); - goto main_decode_loop; // yes, this makes no sense - } - len <<= 8; - len |= stbi__get8(s); - if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); } - if (scanline == NULL) { - scanline = (stbi_uc *) stbi__malloc_mad2(width, 4, 0); - if (!scanline) { - STBI_FREE(hdr_data); - return stbi__errpf("outofmem", "Out of memory"); - } - } - - for (k = 0; k < 4; ++k) { - int nleft; - i = 0; - while ((nleft = width - i) > 0) { - count = stbi__get8(s); - if (count > 128) { - // Run - value = stbi__get8(s); - count -= 128; - if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); } - for (z = 0; z < count; ++z) - scanline[i++ * 4 + k] = value; - } else { - // Dump - if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); } - for (z = 0; z < count; ++z) - scanline[i++ * 4 + k] = stbi__get8(s); - } - } - } - for (i=0; i < width; ++i) - stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp); - } - if (scanline) - STBI_FREE(scanline); + // Read RLE-encoded data + scanline = NULL; + + for (j = 0; j < height; ++j) { + c1 = stbi__get8(s); + c2 = stbi__get8(s); + len = stbi__get8(s); + if (c1 != 2 || c2 != 2 || (len & 0x80)) { + // not run-length encoded, so we have to actually use THIS data as a decoded + // pixel (note this can't be a valid pixel--one of RGB must be >= 128) + stbi_uc rgbe[4]; + rgbe[0] = (stbi_uc) c1; + rgbe[1] = (stbi_uc) c2; + rgbe[2] = (stbi_uc) len; + rgbe[3] = (stbi_uc) stbi__get8(s); + stbi__hdr_convert(hdr_data, rgbe, req_comp); + i = 1; + j = 0; + STBI_FREE(scanline); + goto main_decode_loop; // yes, this makes no sense + } + len <<= 8; + len |= stbi__get8(s); + if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); } + if (scanline == NULL) { + scanline = (stbi_uc *) stbi__malloc_mad2(width, 4, 0); + if (!scanline) { + STBI_FREE(hdr_data); + return stbi__errpf("outofmem", "Out of memory"); + } + } + + for (k = 0; k < 4; ++k) { + int nleft; + i = 0; + while ((nleft = width - i) > 0) { + count = stbi__get8(s); + if (count > 128) { + // Run + value = stbi__get8(s); + count -= 128; + if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); } + for (z = 0; z < count; ++z) + scanline[i++ * 4 + k] = value; + } else { + // Dump + if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); } + for (z = 0; z < count; ++z) + scanline[i++ * 4 + k] = stbi__get8(s); + } + } + } + for (i=0; i < width; ++i) + stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp); + } + if (scanline) + STBI_FREE(scanline); } return hdr_data; @@ -6806,31 +6806,31 @@ static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp) if (!comp) comp = &dummy; if (stbi__hdr_test(s) == 0) { - stbi__rewind( s ); - return 0; + stbi__rewind( s ); + return 0; } for(;;) { - token = stbi__hdr_gettoken(s,buffer); - if (token[0] == 0) break; - if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; + token = stbi__hdr_gettoken(s,buffer); + if (token[0] == 0) break; + if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; } if (!valid) { - stbi__rewind( s ); - return 0; + stbi__rewind( s ); + return 0; } token = stbi__hdr_gettoken(s,buffer); if (strncmp(token, "-Y ", 3)) { - stbi__rewind( s ); - return 0; + stbi__rewind( s ); + return 0; } token += 3; *y = (int) strtol(token, &token, 10); while (*token == ' ') ++token; if (strncmp(token, "+X ", 3)) { - stbi__rewind( s ); - return 0; + stbi__rewind( s ); + return 0; } token += 3; *x = (int) strtol(token, NULL, 10); @@ -6849,7 +6849,7 @@ static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp) p = stbi__bmp_parse_header(s, &info); stbi__rewind( s ); if (p == NULL) - return 0; + return 0; if (x) *x = s->img_x; if (y) *y = s->img_y; if (comp) *comp = info.ma ? 4 : 3; @@ -6865,29 +6865,29 @@ static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp) if (!y) y = &dummy; if (!comp) comp = &dummy; if (stbi__get32be(s) != 0x38425053) { - stbi__rewind( s ); - return 0; + stbi__rewind( s ); + return 0; } if (stbi__get16be(s) != 1) { - stbi__rewind( s ); - return 0; + stbi__rewind( s ); + return 0; } stbi__skip(s, 6); channelCount = stbi__get16be(s); if (channelCount < 0 || channelCount > 16) { - stbi__rewind( s ); - return 0; + stbi__rewind( s ); + return 0; } *y = stbi__get32be(s); *x = stbi__get32be(s); depth = stbi__get16be(s); if (depth != 8 && depth != 16) { - stbi__rewind( s ); - return 0; + stbi__rewind( s ); + return 0; } if (stbi__get16be(s) != 3) { - stbi__rewind( s ); - return 0; + stbi__rewind( s ); + return 0; } *comp = 4; return 1; @@ -6897,25 +6897,25 @@ static int stbi__psd_is16(stbi__context *s) { int channelCount, depth; if (stbi__get32be(s) != 0x38425053) { - stbi__rewind( s ); - return 0; + stbi__rewind( s ); + return 0; } if (stbi__get16be(s) != 1) { - stbi__rewind( s ); - return 0; + stbi__rewind( s ); + return 0; } stbi__skip(s, 6); channelCount = stbi__get16be(s); if (channelCount < 0 || channelCount > 16) { - stbi__rewind( s ); - return 0; + stbi__rewind( s ); + return 0; } (void) stbi__get32be(s); (void) stbi__get32be(s); depth = stbi__get16be(s); if (depth != 16) { - stbi__rewind( s ); - return 0; + stbi__rewind( s ); + return 0; } return 1; } @@ -6932,8 +6932,8 @@ static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp) if (!comp) comp = &dummy; if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) { - stbi__rewind(s); - return 0; + stbi__rewind(s); + return 0; } stbi__skip(s, 88); @@ -6941,37 +6941,37 @@ static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp) *x = stbi__get16be(s); *y = stbi__get16be(s); if (stbi__at_eof(s)) { - stbi__rewind( s); - return 0; + stbi__rewind( s); + return 0; } if ( (*x) != 0 && (1 << 28) / (*x) < (*y)) { - stbi__rewind( s ); - return 0; + stbi__rewind( s ); + return 0; } stbi__skip(s, 8); do { - stbi__pic_packet *packet; - - if (num_packets==sizeof(packets)/sizeof(packets[0])) - return 0; - - packet = &packets[num_packets++]; - chained = stbi__get8(s); - packet->size = stbi__get8(s); - packet->type = stbi__get8(s); - packet->channel = stbi__get8(s); - act_comp |= packet->channel; - - if (stbi__at_eof(s)) { - stbi__rewind( s ); - return 0; - } - if (packet->size != 8) { - stbi__rewind( s ); - return 0; - } + stbi__pic_packet *packet; + + if (num_packets==sizeof(packets)/sizeof(packets[0])) + return 0; + + packet = &packets[num_packets++]; + chained = stbi__get8(s); + packet->size = stbi__get8(s); + packet->type = stbi__get8(s); + packet->channel = stbi__get8(s); + act_comp |= packet->channel; + + if (stbi__at_eof(s)) { + stbi__rewind( s ); + return 0; + } + if (packet->size != 8) { + stbi__rewind( s ); + return 0; + } } while (chained); *comp = (act_comp & 0x10 ? 4 : 3); @@ -6988,20 +6988,20 @@ static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp) // PPM: http://netpbm.sourceforge.net/doc/ppm.html // // Known limitations: -// Does not support comments in the header section -// Does not support ASCII image data (formats P2 and P3) -// Does not support 16-bit-per-channel +// Does not support comments in the header section +// Does not support ASCII image data (formats P2 and P3) +// Does not support 16-bit-per-channel #ifndef STBI_NO_PNM -static int stbi__pnm_test(stbi__context *s) +static int stbi__pnm_test(stbi__context *s) { char p, t; p = (char) stbi__get8(s); t = (char) stbi__get8(s); if (p != 'P' || (t != '5' && t != '6')) { - stbi__rewind( s ); - return 0; + stbi__rewind( s ); + return 0; } return 1; } @@ -7012,63 +7012,63 @@ static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req STBI_NOTUSED(ri); if (!stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n)) - return 0; + return 0; *x = s->img_x; *y = s->img_y; if (comp) *comp = s->img_n; if (!stbi__mad3sizes_valid(s->img_n, s->img_x, s->img_y, 0)) - return stbi__errpuc("too large", "PNM too large"); + return stbi__errpuc("too large", "PNM too large"); out = (stbi_uc *) stbi__malloc_mad3(s->img_n, s->img_x, s->img_y, 0); if (!out) return stbi__errpuc("outofmem", "Out of memory"); stbi__getn(s, out, s->img_n * s->img_x * s->img_y); if (req_comp && req_comp != s->img_n) { - out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y); - if (out == NULL) return out; // stbi__convert_format frees input on failure + out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y); + if (out == NULL) return out; // stbi__convert_format frees input on failure } return out; } -static int stbi__pnm_isspace(char c) +static int stbi__pnm_isspace(char c) { return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r'; } -static void stbi__pnm_skip_whitespace(stbi__context *s, char *c) +static void stbi__pnm_skip_whitespace(stbi__context *s, char *c) { for (;;) { - while (!stbi__at_eof(s) && stbi__pnm_isspace(*c)) - *c = (char) stbi__get8(s); + while (!stbi__at_eof(s) && stbi__pnm_isspace(*c)) + *c = (char) stbi__get8(s); - if (stbi__at_eof(s) || *c != '#') - break; + if (stbi__at_eof(s) || *c != '#') + break; - while (!stbi__at_eof(s) && *c != '\n' && *c != '\r' ) - *c = (char) stbi__get8(s); + while (!stbi__at_eof(s) && *c != '\n' && *c != '\r' ) + *c = (char) stbi__get8(s); } } -static int stbi__pnm_isdigit(char c) +static int stbi__pnm_isdigit(char c) { return c >= '0' && c <= '9'; } -static int stbi__pnm_getinteger(stbi__context *s, char *c) +static int stbi__pnm_getinteger(stbi__context *s, char *c) { int value = 0; while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) { - value = value*10 + (*c - '0'); - *c = (char) stbi__get8(s); + value = value*10 + (*c - '0'); + *c = (char) stbi__get8(s); } return value; } -static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp) +static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp) { int maxv, dummy; char c, p, t; @@ -7083,11 +7083,11 @@ static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp) p = (char) stbi__get8(s); t = (char) stbi__get8(s); if (p != 'P' || (t != '5' && t != '6')) { - stbi__rewind(s); - return 0; + stbi__rewind(s); + return 0; } - *comp = (t == '6') ? 3 : 1; // '5' is 1-component .pgm; '6' is 3-component .ppm + *comp = (t == '6') ? 3 : 1; // '5' is 1-component .pgm; '6' is 3-component .ppm c = (char) stbi__get8(s); stbi__pnm_skip_whitespace(s, &c); @@ -7098,12 +7098,12 @@ static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp) *y = stbi__pnm_getinteger(s, &c); // read height stbi__pnm_skip_whitespace(s, &c); - maxv = stbi__pnm_getinteger(s, &c); // read max value + maxv = stbi__pnm_getinteger(s, &c); // read max value if (maxv > 255) - return stbi__err("max value > 255", "PPM image not 8-bit"); + return stbi__err("max value > 255", "PPM image not 8-bit"); else - return 1; + return 1; } #endif @@ -7144,7 +7144,7 @@ static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp) // test tga last because it's a crappy test! #ifndef STBI_NO_TGA if (stbi__tga_info(s, x, y, comp)) - return 1; + return 1; #endif return stbi__err("unknown image type", "Image not of any known type, or corrupt"); } @@ -7165,12 +7165,12 @@ static int stbi__is_16_main(stbi__context *s) #ifndef STBI_NO_STDIO STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp) { - FILE *f = stbi__fopen(filename, "rb"); - int result; - if (!f) return stbi__err("can't fopen", "Unable to open file"); - result = stbi_info_from_file(f, x, y, comp); - fclose(f); - return result; + FILE *f = stbi__fopen(filename, "rb"); + int result; + if (!f) return stbi__err("can't fopen", "Unable to open file"); + result = stbi_info_from_file(f, x, y, comp); + fclose(f); + return result; } STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp) @@ -7186,12 +7186,12 @@ STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp) STBIDEF int stbi_is_16_bit(char const *filename) { - FILE *f = stbi__fopen(filename, "rb"); - int result; - if (!f) return stbi__err("can't fopen", "Unable to open file"); - result = stbi_is_16_bit_from_file(f); - fclose(f); - return result; + FILE *f = stbi__fopen(filename, "rb"); + int result; + if (!f) return stbi__err("can't fopen", "Unable to open file"); + result = stbi_is_16_bit_from_file(f); + fclose(f); + return result; } STBIDEF int stbi_is_16_bit_from_file(FILE *f) @@ -7238,184 +7238,184 @@ STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *c, void *user /* revision history: - 2.19 (2018-02-11) fix warning - 2.18 (2018-01-30) fix warnings - 2.17 (2018-01-29) change sbti__shiftsigned to avoid clang -O2 bug - 1-bit BMP - *_is_16_bit api - avoid warnings - 2.16 (2017-07-23) all functions have 16-bit variants; - STBI_NO_STDIO works again; - compilation fixes; - fix rounding in unpremultiply; - optimize vertical flip; - disable raw_len validation; - documentation fixes - 2.15 (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode; - warning fixes; disable run-time SSE detection on gcc; - uniform handling of optional "return" values; - thread-safe initialization of zlib tables - 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs - 2.13 (2016-11-29) add 16-bit API, only supported for PNG right now - 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes - 2.11 (2016-04-02) allocate large structures on the stack - remove white matting for transparent PSD - fix reported channel count for PNG & BMP - re-enable SSE2 in non-gcc 64-bit - support RGB-formatted JPEG - read 16-bit PNGs (only as 8-bit) - 2.10 (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED - 2.09 (2016-01-16) allow comments in PNM files - 16-bit-per-pixel TGA (not bit-per-component) - info() for TGA could break due to .hdr handling - info() for BMP to shares code instead of sloppy parse - can use STBI_REALLOC_SIZED if allocator doesn't support realloc - code cleanup - 2.08 (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA - 2.07 (2015-09-13) fix compiler warnings - partial animated GIF support - limited 16-bpc PSD support - #ifdef unused functions - bug with < 92 byte PIC,PNM,HDR,TGA - 2.06 (2015-04-19) fix bug where PSD returns wrong '*comp' value - 2.05 (2015-04-19) fix bug in progressive JPEG handling, fix warning - 2.04 (2015-04-15) try to re-enable SIMD on MinGW 64-bit - 2.03 (2015-04-12) extra corruption checking (mmozeiko) - stbi_set_flip_vertically_on_load (nguillemot) - fix NEON support; fix mingw support - 2.02 (2015-01-19) fix incorrect assert, fix warning - 2.01 (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2 - 2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG - 2.00 (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg) - progressive JPEG (stb) - PGM/PPM support (Ken Miller) - STBI_MALLOC,STBI_REALLOC,STBI_FREE - GIF bugfix -- seemingly never worked - STBI_NO_*, STBI_ONLY_* - 1.48 (2014-12-14) fix incorrectly-named assert() - 1.47 (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb) - optimize PNG (ryg) - fix bug in interlaced PNG with user-specified channel count (stb) - 1.46 (2014-08-26) - fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG - 1.45 (2014-08-16) - fix MSVC-ARM internal compiler error by wrapping malloc - 1.44 (2014-08-07) - various warning fixes from Ronny Chevalier - 1.43 (2014-07-15) - fix MSVC-only compiler problem in code changed in 1.42 - 1.42 (2014-07-09) - don't define _CRT_SECURE_NO_WARNINGS (affects user code) - fixes to stbi__cleanup_jpeg path - added STBI_ASSERT to avoid requiring assert.h - 1.41 (2014-06-25) - fix search&replace from 1.36 that messed up comments/error messages - 1.40 (2014-06-22) - fix gcc struct-initialization warning - 1.39 (2014-06-15) - fix to TGA optimization when req_comp != number of components in TGA; - fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite) - add support for BMP version 5 (more ignored fields) - 1.38 (2014-06-06) - suppress MSVC warnings on integer casts truncating values - fix accidental rename of 'skip' field of I/O - 1.37 (2014-06-04) - remove duplicate typedef - 1.36 (2014-06-03) - convert to header file single-file library - if de-iphone isn't set, load iphone images color-swapped instead of returning NULL - 1.35 (2014-05-27) - various warnings - fix broken STBI_SIMD path - fix bug where stbi_load_from_file no longer left file pointer in correct place - fix broken non-easy path for 32-bit BMP (possibly never used) - TGA optimization by Arseny Kapoulkine - 1.34 (unknown) - use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case - 1.33 (2011-07-14) - make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements - 1.32 (2011-07-13) - support for "info" function for all supported filetypes (SpartanJ) - 1.31 (2011-06-20) - a few more leak fixes, bug in PNG handling (SpartanJ) - 1.30 (2011-06-11) - added ability to load files via callbacks to accomidate custom input streams (Ben Wenger) - removed deprecated format-specific test/load functions - removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway - error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha) - fix inefficiency in decoding 32-bit BMP (David Woo) - 1.29 (2010-08-16) - various warning fixes from Aurelien Pocheville - 1.28 (2010-08-01) - fix bug in GIF palette transparency (SpartanJ) - 1.27 (2010-08-01) - cast-to-stbi_uc to fix warnings - 1.26 (2010-07-24) - fix bug in file buffering for PNG reported by SpartanJ - 1.25 (2010-07-17) - refix trans_data warning (Won Chun) - 1.24 (2010-07-12) - perf improvements reading from files on platforms with lock-heavy fgetc() - minor perf improvements for jpeg - deprecated type-specific functions so we'll get feedback if they're needed - attempt to fix trans_data warning (Won Chun) - 1.23 fixed bug in iPhone support - 1.22 (2010-07-10) - removed image *writing* support - stbi_info support from Jetro Lauha - GIF support from Jean-Marc Lienher - iPhone PNG-extensions from James Brown - warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva) - 1.21 fix use of 'stbi_uc' in header (reported by jon blow) - 1.20 added support for Softimage PIC, by Tom Seddon - 1.19 bug in interlaced PNG corruption check (found by ryg) - 1.18 (2008-08-02) - fix a threading bug (local mutable static) - 1.17 support interlaced PNG - 1.16 major bugfix - stbi__convert_format converted one too many pixels - 1.15 initialize some fields for thread safety - 1.14 fix threadsafe conversion bug - header-file-only version (#define STBI_HEADER_FILE_ONLY before including) - 1.13 threadsafe - 1.12 const qualifiers in the API - 1.11 Support installable IDCT, colorspace conversion routines - 1.10 Fixes for 64-bit (don't use "unsigned long") - optimized upsampling by Fabian "ryg" Giesen - 1.09 Fix format-conversion for PSD code (bad global variables!) - 1.08 Thatcher Ulrich's PSD code integrated by Nicolas Schulz - 1.07 attempt to fix C++ warning/errors again - 1.06 attempt to fix C++ warning/errors again - 1.05 fix TGA loading to return correct *comp and use good luminance calc - 1.04 default float alpha is 1, not 255; use 'void *' for stbi_image_free - 1.03 bugfixes to STBI_NO_STDIO, STBI_NO_HDR - 1.02 support for (subset of) HDR files, float interface for preferred access to them - 1.01 fix bug: possible bug in handling right-side up bmps... not sure - fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all - 1.00 interface to zlib that skips zlib header - 0.99 correct handling of alpha in palette - 0.98 TGA loader by lonesock; dynamically add loaders (untested) - 0.97 jpeg errors on too large a file; also catch another malloc failure - 0.96 fix detection of invalid v value - particleman@mollyrocket forum - 0.95 during header scan, seek to markers in case of padding - 0.94 STBI_NO_STDIO to disable stdio usage; rename all #defines the same - 0.93 handle jpegtran output; verbose errors - 0.92 read 4,8,16,24,32-bit BMP files of several formats - 0.91 output 24-bit Windows 3.0 BMP files - 0.90 fix a few more warnings; bump version number to approach 1.0 - 0.61 bugfixes due to Marc LeBlanc, Christopher Lloyd - 0.60 fix compiling as c++ - 0.59 fix warnings: merge Dave Moore's -Wall fixes - 0.58 fix bug: zlib uncompressed mode len/nlen was wrong endian - 0.57 fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available - 0.56 fix bug: zlib uncompressed mode len vs. nlen - 0.55 fix bug: restart_interval not initialized to 0 - 0.54 allow NULL for 'int *comp' - 0.53 fix bug in png 3->4; speedup png decoding - 0.52 png handles req_comp=3,4 directly; minor cleanup; jpeg comments - 0.51 obey req_comp requests, 1-component jpegs return as 1-component, - on 'test' only check type, not whether we support this variant - 0.50 (2006-11-19) - first released version + 2.19 (2018-02-11) fix warning + 2.18 (2018-01-30) fix warnings + 2.17 (2018-01-29) change sbti__shiftsigned to avoid clang -O2 bug + 1-bit BMP + *_is_16_bit api + avoid warnings + 2.16 (2017-07-23) all functions have 16-bit variants; + STBI_NO_STDIO works again; + compilation fixes; + fix rounding in unpremultiply; + optimize vertical flip; + disable raw_len validation; + documentation fixes + 2.15 (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode; + warning fixes; disable run-time SSE detection on gcc; + uniform handling of optional "return" values; + thread-safe initialization of zlib tables + 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs + 2.13 (2016-11-29) add 16-bit API, only supported for PNG right now + 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes + 2.11 (2016-04-02) allocate large structures on the stack + remove white matting for transparent PSD + fix reported channel count for PNG & BMP + re-enable SSE2 in non-gcc 64-bit + support RGB-formatted JPEG + read 16-bit PNGs (only as 8-bit) + 2.10 (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED + 2.09 (2016-01-16) allow comments in PNM files + 16-bit-per-pixel TGA (not bit-per-component) + info() for TGA could break due to .hdr handling + info() for BMP to shares code instead of sloppy parse + can use STBI_REALLOC_SIZED if allocator doesn't support realloc + code cleanup + 2.08 (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA + 2.07 (2015-09-13) fix compiler warnings + partial animated GIF support + limited 16-bpc PSD support + #ifdef unused functions + bug with < 92 byte PIC,PNM,HDR,TGA + 2.06 (2015-04-19) fix bug where PSD returns wrong '*comp' value + 2.05 (2015-04-19) fix bug in progressive JPEG handling, fix warning + 2.04 (2015-04-15) try to re-enable SIMD on MinGW 64-bit + 2.03 (2015-04-12) extra corruption checking (mmozeiko) + stbi_set_flip_vertically_on_load (nguillemot) + fix NEON support; fix mingw support + 2.02 (2015-01-19) fix incorrect assert, fix warning + 2.01 (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2 + 2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG + 2.00 (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg) + progressive JPEG (stb) + PGM/PPM support (Ken Miller) + STBI_MALLOC,STBI_REALLOC,STBI_FREE + GIF bugfix -- seemingly never worked + STBI_NO_*, STBI_ONLY_* + 1.48 (2014-12-14) fix incorrectly-named assert() + 1.47 (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb) + optimize PNG (ryg) + fix bug in interlaced PNG with user-specified channel count (stb) + 1.46 (2014-08-26) + fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG + 1.45 (2014-08-16) + fix MSVC-ARM internal compiler error by wrapping malloc + 1.44 (2014-08-07) + various warning fixes from Ronny Chevalier + 1.43 (2014-07-15) + fix MSVC-only compiler problem in code changed in 1.42 + 1.42 (2014-07-09) + don't define _CRT_SECURE_NO_WARNINGS (affects user code) + fixes to stbi__cleanup_jpeg path + added STBI_ASSERT to avoid requiring assert.h + 1.41 (2014-06-25) + fix search&replace from 1.36 that messed up comments/error messages + 1.40 (2014-06-22) + fix gcc struct-initialization warning + 1.39 (2014-06-15) + fix to TGA optimization when req_comp != number of components in TGA; + fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite) + add support for BMP version 5 (more ignored fields) + 1.38 (2014-06-06) + suppress MSVC warnings on integer casts truncating values + fix accidental rename of 'skip' field of I/O + 1.37 (2014-06-04) + remove duplicate typedef + 1.36 (2014-06-03) + convert to header file single-file library + if de-iphone isn't set, load iphone images color-swapped instead of returning NULL + 1.35 (2014-05-27) + various warnings + fix broken STBI_SIMD path + fix bug where stbi_load_from_file no longer left file pointer in correct place + fix broken non-easy path for 32-bit BMP (possibly never used) + TGA optimization by Arseny Kapoulkine + 1.34 (unknown) + use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case + 1.33 (2011-07-14) + make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements + 1.32 (2011-07-13) + support for "info" function for all supported filetypes (SpartanJ) + 1.31 (2011-06-20) + a few more leak fixes, bug in PNG handling (SpartanJ) + 1.30 (2011-06-11) + added ability to load files via callbacks to accomidate custom input streams (Ben Wenger) + removed deprecated format-specific test/load functions + removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway + error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha) + fix inefficiency in decoding 32-bit BMP (David Woo) + 1.29 (2010-08-16) + various warning fixes from Aurelien Pocheville + 1.28 (2010-08-01) + fix bug in GIF palette transparency (SpartanJ) + 1.27 (2010-08-01) + cast-to-stbi_uc to fix warnings + 1.26 (2010-07-24) + fix bug in file buffering for PNG reported by SpartanJ + 1.25 (2010-07-17) + refix trans_data warning (Won Chun) + 1.24 (2010-07-12) + perf improvements reading from files on platforms with lock-heavy fgetc() + minor perf improvements for jpeg + deprecated type-specific functions so we'll get feedback if they're needed + attempt to fix trans_data warning (Won Chun) + 1.23 fixed bug in iPhone support + 1.22 (2010-07-10) + removed image *writing* support + stbi_info support from Jetro Lauha + GIF support from Jean-Marc Lienher + iPhone PNG-extensions from James Brown + warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva) + 1.21 fix use of 'stbi_uc' in header (reported by jon blow) + 1.20 added support for Softimage PIC, by Tom Seddon + 1.19 bug in interlaced PNG corruption check (found by ryg) + 1.18 (2008-08-02) + fix a threading bug (local mutable static) + 1.17 support interlaced PNG + 1.16 major bugfix - stbi__convert_format converted one too many pixels + 1.15 initialize some fields for thread safety + 1.14 fix threadsafe conversion bug + header-file-only version (#define STBI_HEADER_FILE_ONLY before including) + 1.13 threadsafe + 1.12 const qualifiers in the API + 1.11 Support installable IDCT, colorspace conversion routines + 1.10 Fixes for 64-bit (don't use "unsigned long") + optimized upsampling by Fabian "ryg" Giesen + 1.09 Fix format-conversion for PSD code (bad global variables!) + 1.08 Thatcher Ulrich's PSD code integrated by Nicolas Schulz + 1.07 attempt to fix C++ warning/errors again + 1.06 attempt to fix C++ warning/errors again + 1.05 fix TGA loading to return correct *comp and use good luminance calc + 1.04 default float alpha is 1, not 255; use 'void *' for stbi_image_free + 1.03 bugfixes to STBI_NO_STDIO, STBI_NO_HDR + 1.02 support for (subset of) HDR files, float interface for preferred access to them + 1.01 fix bug: possible bug in handling right-side up bmps... not sure + fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all + 1.00 interface to zlib that skips zlib header + 0.99 correct handling of alpha in palette + 0.98 TGA loader by lonesock; dynamically add loaders (untested) + 0.97 jpeg errors on too large a file; also catch another malloc failure + 0.96 fix detection of invalid v value - particleman@mollyrocket forum + 0.95 during header scan, seek to markers in case of padding + 0.94 STBI_NO_STDIO to disable stdio usage; rename all #defines the same + 0.93 handle jpegtran output; verbose errors + 0.92 read 4,8,16,24,32-bit BMP files of several formats + 0.91 output 24-bit Windows 3.0 BMP files + 0.90 fix a few more warnings; bump version number to approach 1.0 + 0.61 bugfixes due to Marc LeBlanc, Christopher Lloyd + 0.60 fix compiling as c++ + 0.59 fix warnings: merge Dave Moore's -Wall fixes + 0.58 fix bug: zlib uncompressed mode len/nlen was wrong endian + 0.57 fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available + 0.56 fix bug: zlib uncompressed mode len vs. nlen + 0.55 fix bug: restart_interval not initialized to 0 + 0.54 allow NULL for 'int *comp' + 0.53 fix bug in png 3->4; speedup png decoding + 0.52 png handles req_comp=3,4 directly; minor cleanup; jpeg comments + 0.51 obey req_comp requests, 1-component jpegs return as 1-component, + on 'test' only check type, not whether we support this variant + 0.50 (2006-11-19) + first released version */ diff --git a/src/stb_image_write.h b/src/stb_image_write.h index c05e958..1a23f1e 100644 --- a/src/stb_image_write.h +++ b/src/stb_image_write.h @@ -1,10 +1,10 @@ /* stb_image_write - v1.09 - public domain - http://nothings.org/stb/stb_image_write.h writes out PNG/BMP/TGA/JPEG/HDR images to C stdio - Sean Barrett 2010-2015 - no warranty implied; use at your own risk + no warranty implied; use at your own risk Before #including, - #define STB_IMAGE_WRITE_IMPLEMENTATION + #define STB_IMAGE_WRITE_IMPLEMENTATION in the file that you want to have the implementation. @@ -13,7 +13,7 @@ If using a modern Microsoft Compiler, non-safe versions of CRT calls may cause compilation warnings or even errors. To avoid this, also before #including, - #define STBI_MSC_SECURE_CRT + #define STBI_MSC_SECURE_CRT ABOUT: @@ -42,30 +42,30 @@ USAGE: There are five functions, one for each image file format: - int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes); - int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data); - int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data); - int stbi_write_jpg(char const *filename, int w, int h, int comp, const void *data, int quality); - int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data); + int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes); + int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data); + int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data); + int stbi_write_jpg(char const *filename, int w, int h, int comp, const void *data, int quality); + int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data); - void stbi_flip_vertically_on_write(int flag); // flag is non-zero to flip data vertically + void stbi_flip_vertically_on_write(int flag); // flag is non-zero to flip data vertically There are also five equivalent functions that use an arbitrary write function. You are expected to open/close your file-equivalent before and after calling these: - int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data, int stride_in_bytes); - int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); - int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); - int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data); - int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality); + int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data, int stride_in_bytes); + int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); + int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); + int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data); + int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality); where the callback is: - void stbi_write_func(void *context, void *data, int size); + void stbi_write_func(void *context, void *data, int size); You can configure it with these global variables: - int stbi_write_tga_with_rle; // defaults to true; set to 0 to disable RLE - int stbi_write_png_compression_level; // defaults to 8; set to higher for more compression - int stbi_write_force_png_filter; // defaults to -1; set to 0..5 to force a filter mode + int stbi_write_tga_with_rle; // defaults to true; set to 0 to disable RLE + int stbi_write_png_compression_level; // defaults to 8; set to higher for more compression + int stbi_write_force_png_filter; // defaults to -1; set to 0..5 to force a filter mode You can define STBI_WRITE_NO_STDIO to disable the file variant of these @@ -111,33 +111,33 @@ USAGE: CREDITS: - Sean Barrett - PNG/BMP/TGA - Baldur Karlsson - HDR - Jean-Sebastien Guay - TGA monochrome - Tim Kelsey - misc enhancements - Alan Hickman - TGA RLE - Emmanuel Julien - initial file IO callback implementation - Jon Olick - original jo_jpeg.cpp code - Daniel Gibson - integrate JPEG, allow external zlib - Aarni Koskela - allow choosing PNG filter + Sean Barrett - PNG/BMP/TGA + Baldur Karlsson - HDR + Jean-Sebastien Guay - TGA monochrome + Tim Kelsey - misc enhancements + Alan Hickman - TGA RLE + Emmanuel Julien - initial file IO callback implementation + Jon Olick - original jo_jpeg.cpp code + Daniel Gibson - integrate JPEG, allow external zlib + Aarni Koskela - allow choosing PNG filter bugfixes: - github:Chribba - Guillaume Chereau - github:jry2 - github:romigrou - Sergio Gonzalez - Jonas Karlsson - Filip Wasil - Thatcher Ulrich - github:poppolopoppo - Patrick Boettcher - github:xeekworx - Cap Petschulat - Simon Rodriguez - Ivan Tikhonov - github:ignotion - Adam Schackart + github:Chribba + Guillaume Chereau + github:jry2 + github:romigrou + Sergio Gonzalez + Jonas Karlsson + Filip Wasil + Thatcher Ulrich + github:poppolopoppo + Patrick Boettcher + github:xeekworx + Cap Petschulat + Simon Rodriguez + Ivan Tikhonov + github:ignotion + Adam Schackart LICENSE @@ -161,7 +161,7 @@ LICENSE #endif #endif -#ifndef STB_IMAGE_WRITE_STATIC // C++ forbids static forward declarations +#ifndef STB_IMAGE_WRITE_STATIC // C++ forbids static forward declarations extern int stbi_write_tga_with_rle; extern int stbi_write_png_compression_level; extern int stbi_write_force_png_filter; @@ -216,9 +216,9 @@ STBIWDEF void stbi_flip_vertically_on_write(int flip_boolean); #endif #ifndef STBIW_MALLOC -#define STBIW_MALLOC(sz) malloc(sz) -#define STBIW_REALLOC(p,newsz) realloc(p,newsz) -#define STBIW_FREE(p) free(p) +#define STBIW_MALLOC(sz) malloc(sz) +#define STBIW_REALLOC(p,newsz) realloc(p,newsz) +#define STBIW_FREE(p) free(p) #endif #ifndef STBIW_REALLOC_SIZED @@ -264,7 +264,7 @@ typedef struct // initialize a callback-based context static void stbi__start_write_callbacks(stbi__write_context *s, stbi_write_func *c, void *context) { - s->func = c; + s->func = c; s->context = context; } @@ -280,7 +280,7 @@ static int stbi__start_write_file(stbi__write_context *s, const char *filename) FILE *f; #ifdef STBI_MSC_SECURE_CRT if (fopen_s(&f, filename, "wb")) - f = NULL; + f = NULL; #else f = fopen(filename, "wb"); #endif @@ -301,29 +301,29 @@ typedef int stb_image_write_test[sizeof(stbiw_uint32)==4 ? 1 : -1]; static void stbiw__writefv(stbi__write_context *s, const char *fmt, va_list v) { while (*fmt) { - switch (*fmt++) { - case ' ': break; - case '1': { unsigned char x = STBIW_UCHAR(va_arg(v, int)); - s->func(s->context,&x,1); - break; } - case '2': { int x = va_arg(v,int); - unsigned char b[2]; - b[0] = STBIW_UCHAR(x); - b[1] = STBIW_UCHAR(x>>8); - s->func(s->context,b,2); - break; } - case '4': { stbiw_uint32 x = va_arg(v,int); - unsigned char b[4]; - b[0]=STBIW_UCHAR(x); - b[1]=STBIW_UCHAR(x>>8); - b[2]=STBIW_UCHAR(x>>16); - b[3]=STBIW_UCHAR(x>>24); - s->func(s->context,b,4); - break; } - default: - STBIW_ASSERT(0); - return; - } + switch (*fmt++) { + case ' ': break; + case '1': { unsigned char x = STBIW_UCHAR(va_arg(v, int)); + s->func(s->context,&x,1); + break; } + case '2': { int x = va_arg(v,int); + unsigned char b[2]; + b[0] = STBIW_UCHAR(x); + b[1] = STBIW_UCHAR(x>>8); + s->func(s->context,b,2); + break; } + case '4': { stbiw_uint32 x = va_arg(v,int); + unsigned char b[4]; + b[0]=STBIW_UCHAR(x); + b[1]=STBIW_UCHAR(x>>8); + b[2]=STBIW_UCHAR(x>>16); + b[3]=STBIW_UCHAR(x>>24); + s->func(s->context,b,4); + break; } + default: + STBIW_ASSERT(0); + return; + } } } @@ -353,31 +353,31 @@ static void stbiw__write_pixel(stbi__write_context *s, int rgb_dir, int comp, in int k; if (write_alpha < 0) - s->func(s->context, &d[comp - 1], 1); + s->func(s->context, &d[comp - 1], 1); switch (comp) { - case 2: // 2 pixels = mono + alpha, alpha is written separately, so same as 1-channel case - case 1: - if (expand_mono) - stbiw__write3(s, d[0], d[0], d[0]); // monochrome bmp - else - s->func(s->context, d, 1); // monochrome TGA - break; - case 4: - if (!write_alpha) { - // composite against pink background - for (k = 0; k < 3; ++k) - px[k] = bg[k] + ((d[k] - bg[k]) * d[3]) / 255; - stbiw__write3(s, px[1 - rgb_dir], px[1], px[1 + rgb_dir]); - break; - } - /* FALLTHROUGH */ - case 3: - stbiw__write3(s, d[1 - rgb_dir], d[1], d[1 + rgb_dir]); - break; + case 2: // 2 pixels = mono + alpha, alpha is written separately, so same as 1-channel case + case 1: + if (expand_mono) + stbiw__write3(s, d[0], d[0], d[0]); // monochrome bmp + else + s->func(s->context, d, 1); // monochrome TGA + break; + case 4: + if (!write_alpha) { + // composite against pink background + for (k = 0; k < 3; ++k) + px[k] = bg[k] + ((d[k] - bg[k]) * d[3]) / 255; + stbiw__write3(s, px[1 - rgb_dir], px[1], px[1 + rgb_dir]); + break; + } + /* FALLTHROUGH */ + case 3: + stbiw__write3(s, d[1 - rgb_dir], d[1], d[1 + rgb_dir]); + break; } if (write_alpha > 0) - s->func(s->context, &d[comp - 1], 1); + s->func(s->context, &d[comp - 1], 1); } static void stbiw__write_pixels(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, void *data, int write_alpha, int scanline_pad, int expand_mono) @@ -386,36 +386,36 @@ static void stbiw__write_pixels(stbi__write_context *s, int rgb_dir, int vdir, i int i,j, j_end; if (y <= 0) - return; + return; if (stbi__flip_vertically_on_write) - vdir *= -1; + vdir *= -1; if (vdir < 0) - j_end = -1, j = y-1; + j_end = -1, j = y-1; else - j_end = y, j = 0; + j_end = y, j = 0; for (; j != j_end; j += vdir) { - for (i=0; i < x; ++i) { - unsigned char *d = (unsigned char *) data + (j*x+i)*comp; - stbiw__write_pixel(s, rgb_dir, comp, write_alpha, expand_mono, d); - } - s->func(s->context, &zero, scanline_pad); + for (i=0; i < x; ++i) { + unsigned char *d = (unsigned char *) data + (j*x+i)*comp; + stbiw__write_pixel(s, rgb_dir, comp, write_alpha, expand_mono, d); + } + s->func(s->context, &zero, scanline_pad); } } static int stbiw__outfile(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, int expand_mono, void *data, int alpha, int pad, const char *fmt, ...) { if (y < 0 || x < 0) { - return 0; + return 0; } else { - va_list v; - va_start(v, fmt); - stbiw__writefv(s, fmt, v); - va_end(v); - stbiw__write_pixels(s,rgb_dir,vdir,x,y,comp,data,alpha,pad, expand_mono); - return 1; + va_list v; + va_start(v, fmt); + stbiw__writefv(s, fmt, v); + va_end(v); + stbiw__write_pixels(s,rgb_dir,vdir,x,y,comp,data,alpha,pad, expand_mono); + return 1; } } @@ -423,9 +423,9 @@ static int stbi_write_bmp_core(stbi__write_context *s, int x, int y, int comp, c { int pad = (-x*3) & 3; return stbiw__outfile(s,-1,-1,x,y,comp,1,(void *) data,0,pad, - "11 4 22 4" "4 44 22 444444", - 'B', 'M', 14+40+(x*3+pad)*y, 0,0, 14+40, // file header - 40, x,y, 1,24, 0,0,0,0,0,0); // bitmap header + "11 4 22 4" "4 44 22 444444", + 'B', 'M', 14+40+(x*3+pad)*y, 0,0, 14+40, // file header + 40, x,y, 1,24, 0,0,0,0,0,0); // bitmap header } STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data) @@ -440,11 +440,11 @@ STBIWDEF int stbi_write_bmp(char const *filename, int x, int y, int comp, const { stbi__write_context s; if (stbi__start_write_file(&s,filename)) { - int r = stbi_write_bmp_core(&s, x, y, comp, data); - stbi__end_write_file(&s); - return r; + int r = stbi_write_bmp_core(&s, x, y, comp, data); + stbi__end_write_file(&s); + return r; } else - return 0; + return 0; } #endif //!STBI_WRITE_NO_STDIO @@ -455,73 +455,73 @@ static int stbi_write_tga_core(stbi__write_context *s, int x, int y, int comp, v int format = colorbytes < 2 ? 3 : 2; // 3 color channels (RGB/RGBA) = 2, 1 color channel (Y/YA) = 3 if (y < 0 || x < 0) - return 0; + return 0; if (!stbi_write_tga_with_rle) { - return stbiw__outfile(s, -1, -1, x, y, comp, 0, (void *) data, has_alpha, 0, - "111 221 2222 11", 0, 0, format, 0, 0, 0, 0, 0, x, y, (colorbytes + has_alpha) * 8, has_alpha * 8); + return stbiw__outfile(s, -1, -1, x, y, comp, 0, (void *) data, has_alpha, 0, + "111 221 2222 11", 0, 0, format, 0, 0, 0, 0, 0, x, y, (colorbytes + has_alpha) * 8, has_alpha * 8); } else { - int i,j,k; - int jend, jdir; - - stbiw__writef(s, "111 221 2222 11", 0,0,format+8, 0,0,0, 0,0,x,y, (colorbytes + has_alpha) * 8, has_alpha * 8); - - if (stbi__flip_vertically_on_write) { - j = 0; - jend = y; - jdir = 1; - } else { - j = y-1; - jend = -1; - jdir = -1; - } - for (; j != jend; j += jdir) { - unsigned char *row = (unsigned char *) data + j * x * comp; - int len; - - for (i = 0; i < x; i += len) { - unsigned char *begin = row + i * comp; - int diff = 1; - len = 1; - - if (i < x - 1) { - ++len; - diff = memcmp(begin, row + (i + 1) * comp, comp); - if (diff) { - const unsigned char *prev = begin; - for (k = i + 2; k < x && len < 128; ++k) { - if (memcmp(prev, row + k * comp, comp)) { - prev += comp; - ++len; - } else { - --len; - break; - } - } - } else { - for (k = i + 2; k < x && len < 128; ++k) { - if (!memcmp(begin, row + k * comp, comp)) { - ++len; - } else { - break; - } - } - } - } - - if (diff) { - unsigned char header = STBIW_UCHAR(len - 1); - s->func(s->context, &header, 1); - for (k = 0; k < len; ++k) { - stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin + k * comp); - } - } else { - unsigned char header = STBIW_UCHAR(len - 129); - s->func(s->context, &header, 1); - stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin); - } - } - } + int i,j,k; + int jend, jdir; + + stbiw__writef(s, "111 221 2222 11", 0,0,format+8, 0,0,0, 0,0,x,y, (colorbytes + has_alpha) * 8, has_alpha * 8); + + if (stbi__flip_vertically_on_write) { + j = 0; + jend = y; + jdir = 1; + } else { + j = y-1; + jend = -1; + jdir = -1; + } + for (; j != jend; j += jdir) { + unsigned char *row = (unsigned char *) data + j * x * comp; + int len; + + for (i = 0; i < x; i += len) { + unsigned char *begin = row + i * comp; + int diff = 1; + len = 1; + + if (i < x - 1) { + ++len; + diff = memcmp(begin, row + (i + 1) * comp, comp); + if (diff) { + const unsigned char *prev = begin; + for (k = i + 2; k < x && len < 128; ++k) { + if (memcmp(prev, row + k * comp, comp)) { + prev += comp; + ++len; + } else { + --len; + break; + } + } + } else { + for (k = i + 2; k < x && len < 128; ++k) { + if (!memcmp(begin, row + k * comp, comp)) { + ++len; + } else { + break; + } + } + } + } + + if (diff) { + unsigned char header = STBIW_UCHAR(len - 1); + s->func(s->context, &header, 1); + for (k = 0; k < len; ++k) { + stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin + k * comp); + } + } else { + unsigned char header = STBIW_UCHAR(len - 129); + s->func(s->context, &header, 1); + stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin); + } + } + } } return 1; } @@ -538,11 +538,11 @@ STBIWDEF int stbi_write_tga(char const *filename, int x, int y, int comp, const { stbi__write_context s; if (stbi__start_write_file(&s,filename)) { - int r = stbi_write_tga_core(&s, x, y, comp, (void *) data); - stbi__end_write_file(&s); - return r; + int r = stbi_write_tga_core(&s, x, y, comp, (void *) data); + stbi__end_write_file(&s); + return r; } else - return 0; + return 0; } #endif @@ -558,14 +558,14 @@ void stbiw__linear_to_rgbe(unsigned char *rgbe, float *linear) float maxcomp = stbiw__max(linear[0], stbiw__max(linear[1], linear[2])); if (maxcomp < 1e-32f) { - rgbe[0] = rgbe[1] = rgbe[2] = rgbe[3] = 0; + rgbe[0] = rgbe[1] = rgbe[2] = rgbe[3] = 0; } else { - float normalize = (float) frexp(maxcomp, &exponent) * 256.0f/maxcomp; + float normalize = (float) frexp(maxcomp, &exponent) * 256.0f/maxcomp; - rgbe[0] = (unsigned char)(linear[0] * normalize); - rgbe[1] = (unsigned char)(linear[1] * normalize); - rgbe[2] = (unsigned char)(linear[2] * normalize); - rgbe[3] = (unsigned char)(exponent + 128); + rgbe[0] = (unsigned char)(linear[0] * normalize); + rgbe[1] = (unsigned char)(linear[1] * normalize); + rgbe[2] = (unsigned char)(linear[2] * normalize); + rgbe[3] = (unsigned char)(exponent + 128); } } @@ -597,106 +597,106 @@ void stbiw__write_hdr_scanline(stbi__write_context *s, int width, int ncomp, uns /* skip RLE for images too small or large */ if (width < 8 || width >= 32768) { - for (x=0; x < width; x++) { - switch (ncomp) { - case 4: /* fallthrough */ - case 3: linear[2] = scanline[x*ncomp + 2]; - linear[1] = scanline[x*ncomp + 1]; - linear[0] = scanline[x*ncomp + 0]; - break; - default: - linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0]; - break; - } - stbiw__linear_to_rgbe(rgbe, linear); - s->func(s->context, rgbe, 4); - } + for (x=0; x < width; x++) { + switch (ncomp) { + case 4: /* fallthrough */ + case 3: linear[2] = scanline[x*ncomp + 2]; + linear[1] = scanline[x*ncomp + 1]; + linear[0] = scanline[x*ncomp + 0]; + break; + default: + linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0]; + break; + } + stbiw__linear_to_rgbe(rgbe, linear); + s->func(s->context, rgbe, 4); + } } else { - int c,r; - /* encode into scratch buffer */ - for (x=0; x < width; x++) { - switch(ncomp) { - case 4: /* fallthrough */ - case 3: linear[2] = scanline[x*ncomp + 2]; - linear[1] = scanline[x*ncomp + 1]; - linear[0] = scanline[x*ncomp + 0]; - break; - default: - linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0]; - break; - } - stbiw__linear_to_rgbe(rgbe, linear); - scratch[x + width*0] = rgbe[0]; - scratch[x + width*1] = rgbe[1]; - scratch[x + width*2] = rgbe[2]; - scratch[x + width*3] = rgbe[3]; - } - - s->func(s->context, scanlineheader, 4); - - /* RLE each component separately */ - for (c=0; c < 4; c++) { - unsigned char *comp = &scratch[width*c]; - - x = 0; - while (x < width) { - // find first run - r = x; - while (r+2 < width) { - if (comp[r] == comp[r+1] && comp[r] == comp[r+2]) - break; - ++r; - } - if (r+2 >= width) - r = width; - // dump up to first run - while (x < r) { - int len = r-x; - if (len > 128) len = 128; - stbiw__write_dump_data(s, len, &comp[x]); - x += len; - } - // if there's a run, output it - if (r+2 < width) { // same test as what we break out of in search loop, so only true if we break'd - // find next byte after run - while (r < width && comp[r] == comp[x]) - ++r; - // output run up to r - while (x < r) { - int len = r-x; - if (len > 127) len = 127; - stbiw__write_run_data(s, len, comp[x]); - x += len; - } - } - } - } + int c,r; + /* encode into scratch buffer */ + for (x=0; x < width; x++) { + switch(ncomp) { + case 4: /* fallthrough */ + case 3: linear[2] = scanline[x*ncomp + 2]; + linear[1] = scanline[x*ncomp + 1]; + linear[0] = scanline[x*ncomp + 0]; + break; + default: + linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0]; + break; + } + stbiw__linear_to_rgbe(rgbe, linear); + scratch[x + width*0] = rgbe[0]; + scratch[x + width*1] = rgbe[1]; + scratch[x + width*2] = rgbe[2]; + scratch[x + width*3] = rgbe[3]; + } + + s->func(s->context, scanlineheader, 4); + + /* RLE each component separately */ + for (c=0; c < 4; c++) { + unsigned char *comp = &scratch[width*c]; + + x = 0; + while (x < width) { + // find first run + r = x; + while (r+2 < width) { + if (comp[r] == comp[r+1] && comp[r] == comp[r+2]) + break; + ++r; + } + if (r+2 >= width) + r = width; + // dump up to first run + while (x < r) { + int len = r-x; + if (len > 128) len = 128; + stbiw__write_dump_data(s, len, &comp[x]); + x += len; + } + // if there's a run, output it + if (r+2 < width) { // same test as what we break out of in search loop, so only true if we break'd + // find next byte after run + while (r < width && comp[r] == comp[x]) + ++r; + // output run up to r + while (x < r) { + int len = r-x; + if (len > 127) len = 127; + stbiw__write_run_data(s, len, comp[x]); + x += len; + } + } + } + } } } static int stbi_write_hdr_core(stbi__write_context *s, int x, int y, int comp, float *data) { if (y <= 0 || x <= 0 || data == NULL) - return 0; + return 0; else { - // Each component is stored separately. Allocate scratch space for full output scanline. - unsigned char *scratch = (unsigned char *) STBIW_MALLOC(x*4); - int i, len; - char buffer[128]; - char header[] = "#?RADIANCE\n# Written by stb_image_write.h\nFORMAT=32-bit_rle_rgbe\n"; - s->func(s->context, header, sizeof(header)-1); + // Each component is stored separately. Allocate scratch space for full output scanline. + unsigned char *scratch = (unsigned char *) STBIW_MALLOC(x*4); + int i, len; + char buffer[128]; + char header[] = "#?RADIANCE\n# Written by stb_image_write.h\nFORMAT=32-bit_rle_rgbe\n"; + s->func(s->context, header, sizeof(header)-1); #ifdef STBI_MSC_SECURE_CRT - len = sprintf_s(buffer, "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x); + len = sprintf_s(buffer, "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x); #else - len = sprintf(buffer, "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x); + len = sprintf(buffer, "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x); #endif - s->func(s->context, buffer, len); + s->func(s->context, buffer, len); - for(i=0; i < y; i++) - stbiw__write_hdr_scanline(s, x, comp, scratch, data + comp*x*(stbi__flip_vertically_on_write ? y-1-i : i)*x); - STBIW_FREE(scratch); - return 1; + for(i=0; i < y; i++) + stbiw__write_hdr_scanline(s, x, comp, scratch, data + comp*x*(stbi__flip_vertically_on_write ? y-1-i : i)*x); + STBIW_FREE(scratch); + return 1; } } @@ -712,11 +712,11 @@ STBIWDEF int stbi_write_hdr(char const *filename, int x, int y, int comp, const { stbi__write_context s; if (stbi__start_write_file(&s,filename)) { - int r = stbi_write_hdr_core(&s, x, y, comp, (float *) data); - stbi__end_write_file(&s); - return r; + int r = stbi_write_hdr_core(&s, x, y, comp, (float *) data); + stbi__end_write_file(&s); + return r; } else - return 0; + return 0; } #endif // STBI_WRITE_NO_STDIO @@ -729,16 +729,16 @@ STBIWDEF int stbi_write_hdr(char const *filename, int x, int y, int comp, const #ifndef STBIW_ZLIB_COMPRESS // stretchy buffer; stbiw__sbpush() == vector<>::push_back() -- stbiw__sbcount() == vector<>::size() #define stbiw__sbraw(a) ((int *) (a) - 2) -#define stbiw__sbm(a) stbiw__sbraw(a)[0] -#define stbiw__sbn(a) stbiw__sbraw(a)[1] +#define stbiw__sbm(a) stbiw__sbraw(a)[0] +#define stbiw__sbn(a) stbiw__sbraw(a)[1] -#define stbiw__sbneedgrow(a,n) ((a)==0 || stbiw__sbn(a)+n >= stbiw__sbm(a)) +#define stbiw__sbneedgrow(a,n) ((a)==0 || stbiw__sbn(a)+n >= stbiw__sbm(a)) #define stbiw__sbmaybegrow(a,n) (stbiw__sbneedgrow(a,(n)) ? stbiw__sbgrow(a,n) : 0) -#define stbiw__sbgrow(a,n) stbiw__sbgrowf((void **) &(a), (n), sizeof(*(a))) +#define stbiw__sbgrow(a,n) stbiw__sbgrowf((void **) &(a), (n), sizeof(*(a))) -#define stbiw__sbpush(a, v) (stbiw__sbmaybegrow(a,1), (a)[stbiw__sbn(a)++] = (v)) -#define stbiw__sbcount(a) ((a) ? stbiw__sbn(a) : 0) -#define stbiw__sbfree(a) ((a) ? STBIW_FREE(stbiw__sbraw(a)),0 : 0) +#define stbiw__sbpush(a, v) (stbiw__sbmaybegrow(a,1), (a)[stbiw__sbn(a)++] = (v)) +#define stbiw__sbcount(a) ((a) ? stbiw__sbn(a) : 0) +#define stbiw__sbfree(a) ((a) ? STBIW_FREE(stbiw__sbraw(a)),0 : 0) static void *stbiw__sbgrowf(void **arr, int increment, int itemsize) { @@ -746,9 +746,9 @@ static void *stbiw__sbgrowf(void **arr, int increment, int itemsize) void *p = STBIW_REALLOC_SIZED(*arr ? stbiw__sbraw(*arr) : 0, *arr ? (stbiw__sbm(*arr)*itemsize + sizeof(int)*2) : 0, itemsize * m + sizeof(int)*2); STBIW_ASSERT(p); if (p) { - if (!*arr) ((int *) p)[1] = 0; - *arr = (void *) ((int *) p + 2); - stbiw__sbm(*arr) = m; + if (!*arr) ((int *) p)[1] = 0; + *arr = (void *) ((int *) p + 2); + stbiw__sbm(*arr) = m; } return *arr; } @@ -756,9 +756,9 @@ static void *stbiw__sbgrowf(void **arr, int increment, int itemsize) static unsigned char *stbiw__zlib_flushf(unsigned char *data, unsigned int *bitbuffer, int *bitcount) { while (*bitcount >= 8) { - stbiw__sbpush(data, STBIW_UCHAR(*bitbuffer)); - *bitbuffer >>= 8; - *bitcount -= 8; + stbiw__sbpush(data, STBIW_UCHAR(*bitbuffer)); + *bitbuffer >>= 8; + *bitcount -= 8; } return data; } @@ -767,8 +767,8 @@ static int stbiw__zlib_bitrev(int code, int codebits) { int res=0; while (codebits--) { - res = (res << 1) | (code & 1); - code >>= 1; + res = (res << 1) | (code & 1); + code >>= 1; } return res; } @@ -777,7 +777,7 @@ static unsigned int stbiw__zlib_countm(unsigned char *a, unsigned char *b, int l { int i; for (i=0; i < limit && i < 258; ++i) - if (a[i] != b[i]) break; + if (a[i] != b[i]) break; return i; } @@ -795,14 +795,14 @@ static unsigned int stbiw__zhash(unsigned char *data) #define stbiw__zlib_flush() (out = stbiw__zlib_flushf(out, &bitbuf, &bitcount)) #define stbiw__zlib_add(code,codebits) \ - (bitbuf |= (code) << bitcount, bitcount += (codebits), stbiw__zlib_flush()) -#define stbiw__zlib_huffa(b,c) stbiw__zlib_add(stbiw__zlib_bitrev(b,c),c) + (bitbuf |= (code) << bitcount, bitcount += (codebits), stbiw__zlib_flush()) +#define stbiw__zlib_huffa(b,c) stbiw__zlib_add(stbiw__zlib_bitrev(b,c),c) // default huffman tables #define stbiw__zlib_huff1(n) stbiw__zlib_huffa(0x30 + (n), 8) #define stbiw__zlib_huff2(n) stbiw__zlib_huffa(0x190 + (n)-144, 9) #define stbiw__zlib_huff3(n) stbiw__zlib_huffa(0 + (n)-256,7) #define stbiw__zlib_huff4(n) stbiw__zlib_huffa(0xc0 + (n)-280,8) -#define stbiw__zlib_huff(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : (n) <= 255 ? stbiw__zlib_huff2(n) : (n) <= 279 ? stbiw__zlib_huff3(n) : stbiw__zlib_huff4(n)) +#define stbiw__zlib_huff(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : (n) <= 255 ? stbiw__zlib_huff2(n) : (n) <= 279 ? stbiw__zlib_huff3(n) : stbiw__zlib_huff4(n)) #define stbiw__zlib_huffb(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : stbiw__zlib_huff2(n)) #define stbiw__ZHASH 16384 @@ -816,15 +816,15 @@ unsigned char * stbi_zlib_compress(unsigned char *data, int data_len, int *out_l return STBIW_ZLIB_COMPRESS(data, data_len, out_len, quality); #else // use builtin static unsigned short lengthc[] = { 3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,131,163,195,227,258, 259 }; - static unsigned char lengtheb[]= { 0,0,0,0,0,0,0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0 }; + static unsigned char lengtheb[]= { 0,0,0,0,0,0,0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0 }; static unsigned short distc[] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577, 32768 }; - static unsigned char disteb[] = { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13 }; + static unsigned char disteb[] = { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13 }; unsigned int bitbuf=0; int i,j, bitcount=0; unsigned char *out = NULL; unsigned char ***hash_table = (unsigned char***) STBIW_MALLOC(stbiw__ZHASH * sizeof(char**)); if (hash_table == NULL) - return NULL; + return NULL; if (quality < 5) quality = 5; stbiw__sbpush(out, 0x78); // DEFLATE 32K window @@ -833,86 +833,86 @@ unsigned char * stbi_zlib_compress(unsigned char *data, int data_len, int *out_l stbiw__zlib_add(1,2); // BTYPE = 1 -- fixed huffman for (i=0; i < stbiw__ZHASH; ++i) - hash_table[i] = NULL; + hash_table[i] = NULL; i=0; while (i < data_len-3) { - // hash next 3 bytes of data to be compressed - int h = stbiw__zhash(data+i)&(stbiw__ZHASH-1), best=3; - unsigned char *bestloc = 0; - unsigned char **hlist = hash_table[h]; - int n = stbiw__sbcount(hlist); - for (j=0; j < n; ++j) { - if (hlist[j]-data > i-32768) { // if entry lies within window - int d = stbiw__zlib_countm(hlist[j], data+i, data_len-i); - if (d >= best) best=d,bestloc=hlist[j]; - } - } - // when hash table entry is too long, delete half the entries - if (hash_table[h] && stbiw__sbn(hash_table[h]) == 2*quality) { - STBIW_MEMMOVE(hash_table[h], hash_table[h]+quality, sizeof(hash_table[h][0])*quality); - stbiw__sbn(hash_table[h]) = quality; - } - stbiw__sbpush(hash_table[h],data+i); - - if (bestloc) { - // "lazy matching" - check match at *next* byte, and if it's better, do cur byte as literal - h = stbiw__zhash(data+i+1)&(stbiw__ZHASH-1); - hlist = hash_table[h]; - n = stbiw__sbcount(hlist); - for (j=0; j < n; ++j) { - if (hlist[j]-data > i-32767) { - int e = stbiw__zlib_countm(hlist[j], data+i+1, data_len-i-1); - if (e > best) { // if next match is better, bail on current match - bestloc = NULL; - break; - } - } - } - } - - if (bestloc) { - int d = (int) (data+i - bestloc); // distance back - STBIW_ASSERT(d <= 32767 && best <= 258); - for (j=0; best > lengthc[j+1]-1; ++j); - stbiw__zlib_huff(j+257); - if (lengtheb[j]) stbiw__zlib_add(best - lengthc[j], lengtheb[j]); - for (j=0; d > distc[j+1]-1; ++j); - stbiw__zlib_add(stbiw__zlib_bitrev(j,5),5); - if (disteb[j]) stbiw__zlib_add(d - distc[j], disteb[j]); - i += best; - } else { - stbiw__zlib_huffb(data[i]); - ++i; - } + // hash next 3 bytes of data to be compressed + int h = stbiw__zhash(data+i)&(stbiw__ZHASH-1), best=3; + unsigned char *bestloc = 0; + unsigned char **hlist = hash_table[h]; + int n = stbiw__sbcount(hlist); + for (j=0; j < n; ++j) { + if (hlist[j]-data > i-32768) { // if entry lies within window + int d = stbiw__zlib_countm(hlist[j], data+i, data_len-i); + if (d >= best) best=d,bestloc=hlist[j]; + } + } + // when hash table entry is too long, delete half the entries + if (hash_table[h] && stbiw__sbn(hash_table[h]) == 2*quality) { + STBIW_MEMMOVE(hash_table[h], hash_table[h]+quality, sizeof(hash_table[h][0])*quality); + stbiw__sbn(hash_table[h]) = quality; + } + stbiw__sbpush(hash_table[h],data+i); + + if (bestloc) { + // "lazy matching" - check match at *next* byte, and if it's better, do cur byte as literal + h = stbiw__zhash(data+i+1)&(stbiw__ZHASH-1); + hlist = hash_table[h]; + n = stbiw__sbcount(hlist); + for (j=0; j < n; ++j) { + if (hlist[j]-data > i-32767) { + int e = stbiw__zlib_countm(hlist[j], data+i+1, data_len-i-1); + if (e > best) { // if next match is better, bail on current match + bestloc = NULL; + break; + } + } + } + } + + if (bestloc) { + int d = (int) (data+i - bestloc); // distance back + STBIW_ASSERT(d <= 32767 && best <= 258); + for (j=0; best > lengthc[j+1]-1; ++j); + stbiw__zlib_huff(j+257); + if (lengtheb[j]) stbiw__zlib_add(best - lengthc[j], lengtheb[j]); + for (j=0; d > distc[j+1]-1; ++j); + stbiw__zlib_add(stbiw__zlib_bitrev(j,5),5); + if (disteb[j]) stbiw__zlib_add(d - distc[j], disteb[j]); + i += best; + } else { + stbiw__zlib_huffb(data[i]); + ++i; + } } // write out final bytes for (;i < data_len; ++i) - stbiw__zlib_huffb(data[i]); + stbiw__zlib_huffb(data[i]); stbiw__zlib_huff(256); // end of block // pad with 0 bits to byte boundary while (bitcount) - stbiw__zlib_add(0,1); + stbiw__zlib_add(0,1); for (i=0; i < stbiw__ZHASH; ++i) - (void) stbiw__sbfree(hash_table[i]); + (void) stbiw__sbfree(hash_table[i]); STBIW_FREE(hash_table); { - // compute adler32 on input - unsigned int s1=1, s2=0; - int blocklen = (int) (data_len % 5552); - j=0; - while (j < data_len) { - for (i=0; i < blocklen; ++i) s1 += data[j+i], s2 += s1; - s1 %= 65521, s2 %= 65521; - j += blocklen; - blocklen = 5552; - } - stbiw__sbpush(out, STBIW_UCHAR(s2 >> 8)); - stbiw__sbpush(out, STBIW_UCHAR(s2)); - stbiw__sbpush(out, STBIW_UCHAR(s1 >> 8)); - stbiw__sbpush(out, STBIW_UCHAR(s1)); + // compute adler32 on input + unsigned int s1=1, s2=0; + int blocklen = (int) (data_len % 5552); + j=0; + while (j < data_len) { + for (i=0; i < blocklen; ++i) s1 += data[j+i], s2 += s1; + s1 %= 65521, s2 %= 65521; + j += blocklen; + blocklen = 5552; + } + stbiw__sbpush(out, STBIW_UCHAR(s2 >> 8)); + stbiw__sbpush(out, STBIW_UCHAR(s2)); + stbiw__sbpush(out, STBIW_UCHAR(s1 >> 8)); + stbiw__sbpush(out, STBIW_UCHAR(s1)); } *out_len = stbiw__sbn(out); // make returned pointer freeable @@ -925,44 +925,44 @@ static unsigned int stbiw__crc32(unsigned char *buffer, int len) { static unsigned int crc_table[256] = { - 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, - 0x0eDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, - 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, - 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, - 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, - 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, - 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, - 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, - 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, - 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, - 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, - 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, - 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, - 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, - 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, - 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, - 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, - 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, - 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, - 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, - 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, - 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, - 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, - 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, - 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, - 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, - 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, - 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, - 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, - 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, - 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, - 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D + 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, + 0x0eDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, + 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, + 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, + 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, + 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, + 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, + 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, + 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, + 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, + 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, + 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, + 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, + 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, + 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, + 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, + 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, + 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, + 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, + 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, + 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, + 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, + 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, + 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, + 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, + 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, + 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, + 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, + 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, + 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, + 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, + 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D }; unsigned int crc = ~0u; int i; for (i=0; i < len; ++i) - crc = (crc >> 8) ^ crc_table[buffer[i] ^ (crc & 0xff)]; + crc = (crc >> 8) ^ crc_table[buffer[i] ^ (crc & 0xff)]; return ~crc; } @@ -995,26 +995,26 @@ static void stbiw__encode_png_line(unsigned char *pixels, int stride_bytes, int unsigned char *z = pixels + stride_bytes * (stbi__flip_vertically_on_write ? height-1-y : y); int signed_stride = stbi__flip_vertically_on_write ? -stride_bytes : stride_bytes; for (i = 0; i < n; ++i) { - switch (type) { - case 0: line_buffer[i] = z[i]; break; - case 1: line_buffer[i] = z[i]; break; - case 2: line_buffer[i] = z[i] - z[i-signed_stride]; break; - case 3: line_buffer[i] = z[i] - (z[i-signed_stride]>>1); break; - case 4: line_buffer[i] = (signed char) (z[i] - stbiw__paeth(0,z[i-signed_stride],0)); break; - case 5: line_buffer[i] = z[i]; break; - case 6: line_buffer[i] = z[i]; break; - } + switch (type) { + case 0: line_buffer[i] = z[i]; break; + case 1: line_buffer[i] = z[i]; break; + case 2: line_buffer[i] = z[i] - z[i-signed_stride]; break; + case 3: line_buffer[i] = z[i] - (z[i-signed_stride]>>1); break; + case 4: line_buffer[i] = (signed char) (z[i] - stbiw__paeth(0,z[i-signed_stride],0)); break; + case 5: line_buffer[i] = z[i]; break; + case 6: line_buffer[i] = z[i]; break; + } } for (i=n; i < width*n; ++i) { - switch (type) { - case 0: line_buffer[i] = z[i]; break; - case 1: line_buffer[i] = z[i] - z[i-n]; break; - case 2: line_buffer[i] = z[i] - z[i-signed_stride]; break; - case 3: line_buffer[i] = z[i] - ((z[i-n] + z[i-signed_stride])>>1); break; - case 4: line_buffer[i] = z[i] - stbiw__paeth(z[i-n], z[i-signed_stride], z[i-signed_stride-n]); break; - case 5: line_buffer[i] = z[i] - (z[i-n]>>1); break; - case 6: line_buffer[i] = z[i] - stbiw__paeth(z[i-n], 0,0); break; - } + switch (type) { + case 0: line_buffer[i] = z[i]; break; + case 1: line_buffer[i] = z[i] - z[i-n]; break; + case 2: line_buffer[i] = z[i] - z[i-signed_stride]; break; + case 3: line_buffer[i] = z[i] - ((z[i-n] + z[i-signed_stride])>>1); break; + case 4: line_buffer[i] = z[i] - stbiw__paeth(z[i-n], z[i-signed_stride], z[i-signed_stride-n]); break; + case 5: line_buffer[i] = z[i] - (z[i-n]>>1); break; + case 6: line_buffer[i] = z[i] - stbiw__paeth(z[i-n], 0,0); break; + } } } @@ -1028,42 +1028,42 @@ unsigned char *stbi_write_png_to_mem(unsigned char *pixels, int stride_bytes, in int j,zlen; if (stride_bytes == 0) - stride_bytes = x * n; + stride_bytes = x * n; if (force_filter >= 5) { - force_filter = -1; + force_filter = -1; } filt = (unsigned char *) STBIW_MALLOC((x*n+1) * y); if (!filt) return 0; line_buffer = (signed char *) STBIW_MALLOC(x * n); if (!line_buffer) { STBIW_FREE(filt); return 0; } for (j=0; j < y; ++j) { - int filter_type; - if (force_filter > -1) { - filter_type = force_filter; - stbiw__encode_png_line(pixels, stride_bytes, x, y, j, n, force_filter, line_buffer); - } else { // Estimate the best filter by running through all of them: - int best_filter = 0, best_filter_val = 0x7fffffff, est, i; - for (filter_type = 0; filter_type < 5; filter_type++) { - stbiw__encode_png_line(pixels, stride_bytes, x, y, j, n, filter_type, line_buffer); - - // Estimate the entropy of the line using this filter; the less, the better. - est = 0; - for (i = 0; i < x*n; ++i) { - est += abs((signed char) line_buffer[i]); - } - if (est < best_filter_val) { - best_filter_val = est; - best_filter = filter_type; - } - } - if (filter_type != best_filter) { // If the last iteration already got us the best filter, don't redo it - stbiw__encode_png_line(pixels, stride_bytes, x, y, j, n, best_filter, line_buffer); - filter_type = best_filter; - } - } - // when we get here, filter_type contains the filter type, and line_buffer contains the data - filt[j*(x*n+1)] = (unsigned char) filter_type; - STBIW_MEMMOVE(filt+j*(x*n+1)+1, line_buffer, x*n); + int filter_type; + if (force_filter > -1) { + filter_type = force_filter; + stbiw__encode_png_line(pixels, stride_bytes, x, y, j, n, force_filter, line_buffer); + } else { // Estimate the best filter by running through all of them: + int best_filter = 0, best_filter_val = 0x7fffffff, est, i; + for (filter_type = 0; filter_type < 5; filter_type++) { + stbiw__encode_png_line(pixels, stride_bytes, x, y, j, n, filter_type, line_buffer); + + // Estimate the entropy of the line using this filter; the less, the better. + est = 0; + for (i = 0; i < x*n; ++i) { + est += abs((signed char) line_buffer[i]); + } + if (est < best_filter_val) { + best_filter_val = est; + best_filter = filter_type; + } + } + if (filter_type != best_filter) { // If the last iteration already got us the best filter, don't redo it + stbiw__encode_png_line(pixels, stride_bytes, x, y, j, n, best_filter, line_buffer); + filter_type = best_filter; + } + } + // when we get here, filter_type contains the filter type, and line_buffer contains the data + filt[j*(x*n+1)] = (unsigned char) filter_type; + STBIW_MEMMOVE(filt+j*(x*n+1)+1, line_buffer, x*n); } STBIW_FREE(line_buffer); zlib = stbi_zlib_compress(filt, y*( x*n+1), &zlen, stbi_write_png_compression_level); @@ -1113,7 +1113,7 @@ STBIWDEF int stbi_write_png(char const *filename, int x, int y, int comp, const if (png == NULL) return 0; #ifdef STBI_MSC_SECURE_CRT if (fopen_s(&f, filename, "wb")) - f = NULL; + f = NULL; #else f = fopen(filename, "wb"); #endif @@ -1145,20 +1145,20 @@ STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int x, */ static const unsigned char stbiw__jpg_ZigZag[] = { 0,1,5,6,14,15,27,28,2,4,7,13,16,26,29,42,3,8,12,17,25,30,41,43,9,11,18, - 24,31,40,44,53,10,19,23,32,39,45,52,54,20,22,33,38,46,51,55,60,21,34,37,47,50,56,59,61,35,36,48,49,57,58,62,63 }; + 24,31,40,44,53,10,19,23,32,39,45,52,54,20,22,33,38,46,51,55,60,21,34,37,47,50,56,59,61,35,36,48,49,57,58,62,63 }; static void stbiw__jpg_writeBits(stbi__write_context *s, int *bitBufP, int *bitCntP, const unsigned short *bs) { int bitBuf = *bitBufP, bitCnt = *bitCntP; bitCnt += bs[1]; bitBuf |= bs[0] << (24 - bitCnt); while(bitCnt >= 8) { - unsigned char c = (bitBuf >> 16) & 255; - stbiw__putc(s, c); - if(c == 255) { - stbiw__putc(s, 0); - } - bitBuf <<= 8; - bitCnt -= 8; + unsigned char c = (bitBuf >> 16) & 255; + stbiw__putc(s, c); + if(c == 255) { + stbiw__putc(s, 0); + } + bitBuf <<= 8; + bitCnt -= 8; } *bitBufP = bitBuf; *bitCntP = bitCnt; @@ -1178,20 +1178,20 @@ static void stbiw__jpg_DCT(float *d0p, float *d1p, float *d2p, float *d3p, float float tmp4 = d3 - d4; // Even part - float tmp10 = tmp0 + tmp3; // phase 2 + float tmp10 = tmp0 + tmp3; // phase 2 float tmp13 = tmp0 - tmp3; float tmp11 = tmp1 + tmp2; float tmp12 = tmp1 - tmp2; - d0 = tmp10 + tmp11; // phase 3 + d0 = tmp10 + tmp11; // phase 3 d4 = tmp10 - tmp11; z1 = (tmp12 + tmp13) * 0.707106781f; // c4 - d2 = tmp13 + z1; // phase 5 + d2 = tmp13 + z1; // phase 5 d6 = tmp13 - z1; // Odd part - tmp10 = tmp4 + tmp5; // phase 2 + tmp10 = tmp4 + tmp5; // phase 2 tmp11 = tmp5 + tmp6; tmp12 = tmp6 + tmp7; @@ -1201,10 +1201,10 @@ static void stbiw__jpg_DCT(float *d0p, float *d1p, float *d2p, float *d3p, float z4 = tmp12 * 1.306562965f + z5; // c2+c6 z3 = tmp11 * 0.707106781f; // c4 - z11 = tmp7 + z3; // phase 5 + z11 = tmp7 + z3; // phase 5 z13 = tmp7 - z3; - *d5p = z13 + z2; // phase 6 + *d5p = z13 + z2; // phase 6 *d3p = z13 - z2; *d1p = z11 + z4; *d7p = z11 - z4; @@ -1217,7 +1217,7 @@ static void stbiw__jpg_calcBits(int val, unsigned short bits[2]) { val = val < 0 ? val-1 : val; bits[1] = 1; while(tmp1 >>= 1) { - ++bits[1]; + ++bits[1]; } bits[0] = val & ((1<= 16 ) { - int lng = nrzeroes>>4; - int nrmarker; - for (nrmarker=1; nrmarker <= lng; ++nrmarker) - stbiw__jpg_writeBits(s, bitBuf, bitCnt, M16zeroes); - nrzeroes &= 15; - } - stbiw__jpg_calcBits(DU[i], bits); - stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTAC[(nrzeroes<<4)+bits[1]]); - stbiw__jpg_writeBits(s, bitBuf, bitCnt, bits); + int startpos = i; + int nrzeroes; + unsigned short bits[2]; + for (; DU[i]==0 && i<=end0pos; ++i) { + } + nrzeroes = i-startpos; + if ( nrzeroes >= 16 ) { + int lng = nrzeroes>>4; + int nrmarker; + for (nrmarker=1; nrmarker <= lng; ++nrmarker) + stbiw__jpg_writeBits(s, bitBuf, bitCnt, M16zeroes); + nrzeroes &= 15; + } + stbiw__jpg_calcBits(DU[i], bits); + stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTAC[(nrzeroes<<4)+bits[1]]); + stbiw__jpg_writeBits(s, bitBuf, bitCnt, bits); } if(end0pos != 63) { - stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB); + stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB); } return DU[0]; } @@ -1293,78 +1293,78 @@ static int stbi_write_jpg_core(stbi__write_context *s, int width, int height, in static const unsigned char std_dc_luminance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11}; static const unsigned char std_ac_luminance_nrcodes[] = {0,0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,0x7d}; static const unsigned char std_ac_luminance_values[] = { - 0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xa1,0x08, - 0x23,0x42,0xb1,0xc1,0x15,0x52,0xd1,0xf0,0x24,0x33,0x62,0x72,0x82,0x09,0x0a,0x16,0x17,0x18,0x19,0x1a,0x25,0x26,0x27,0x28, - 0x29,0x2a,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59, - 0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x83,0x84,0x85,0x86,0x87,0x88,0x89, - 0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6, - 0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe1,0xe2, - 0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa + 0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xa1,0x08, + 0x23,0x42,0xb1,0xc1,0x15,0x52,0xd1,0xf0,0x24,0x33,0x62,0x72,0x82,0x09,0x0a,0x16,0x17,0x18,0x19,0x1a,0x25,0x26,0x27,0x28, + 0x29,0x2a,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59, + 0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x83,0x84,0x85,0x86,0x87,0x88,0x89, + 0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6, + 0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe1,0xe2, + 0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa }; static const unsigned char std_dc_chrominance_nrcodes[] = {0,0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0}; static const unsigned char std_dc_chrominance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11}; static const unsigned char std_ac_chrominance_nrcodes[] = {0,0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,0x77}; static const unsigned char std_ac_chrominance_values[] = { - 0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91, - 0xa1,0xb1,0xc1,0x09,0x23,0x33,0x52,0xf0,0x15,0x62,0x72,0xd1,0x0a,0x16,0x24,0x34,0xe1,0x25,0xf1,0x17,0x18,0x19,0x1a,0x26, - 0x27,0x28,0x29,0x2a,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58, - 0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x82,0x83,0x84,0x85,0x86,0x87, - 0x88,0x89,0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4, - 0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda, - 0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa + 0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91, + 0xa1,0xb1,0xc1,0x09,0x23,0x33,0x52,0xf0,0x15,0x62,0x72,0xd1,0x0a,0x16,0x24,0x34,0xe1,0x25,0xf1,0x17,0x18,0x19,0x1a,0x26, + 0x27,0x28,0x29,0x2a,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58, + 0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x82,0x83,0x84,0x85,0x86,0x87, + 0x88,0x89,0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4, + 0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda, + 0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa }; // Huffman tables static const unsigned short YDC_HT[256][2] = { {0,2},{2,3},{3,3},{4,3},{5,3},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9}}; static const unsigned short UVDC_HT[256][2] = { {0,2},{1,2},{2,2},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9},{1022,10},{2046,11}}; static const unsigned short YAC_HT[256][2] = { - {10,4},{0,2},{1,2},{4,3},{11,4},{26,5},{120,7},{248,8},{1014,10},{65410,16},{65411,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {12,4},{27,5},{121,7},{502,9},{2038,11},{65412,16},{65413,16},{65414,16},{65415,16},{65416,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {28,5},{249,8},{1015,10},{4084,12},{65417,16},{65418,16},{65419,16},{65420,16},{65421,16},{65422,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {58,6},{503,9},{4085,12},{65423,16},{65424,16},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {59,6},{1016,10},{65430,16},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {122,7},{2039,11},{65438,16},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {123,7},{4086,12},{65446,16},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {250,8},{4087,12},{65454,16},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {504,9},{32704,15},{65462,16},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {505,9},{65470,16},{65471,16},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {506,9},{65479,16},{65480,16},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {1017,10},{65488,16},{65489,16},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {1018,10},{65497,16},{65498,16},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {2040,11},{65506,16},{65507,16},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {65515,16},{65516,16},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{0,0},{0,0},{0,0},{0,0},{0,0}, - {2041,11},{65525,16},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0} + {10,4},{0,2},{1,2},{4,3},{11,4},{26,5},{120,7},{248,8},{1014,10},{65410,16},{65411,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {12,4},{27,5},{121,7},{502,9},{2038,11},{65412,16},{65413,16},{65414,16},{65415,16},{65416,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {28,5},{249,8},{1015,10},{4084,12},{65417,16},{65418,16},{65419,16},{65420,16},{65421,16},{65422,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {58,6},{503,9},{4085,12},{65423,16},{65424,16},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {59,6},{1016,10},{65430,16},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {122,7},{2039,11},{65438,16},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {123,7},{4086,12},{65446,16},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {250,8},{4087,12},{65454,16},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {504,9},{32704,15},{65462,16},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {505,9},{65470,16},{65471,16},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {506,9},{65479,16},{65480,16},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {1017,10},{65488,16},{65489,16},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {1018,10},{65497,16},{65498,16},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {2040,11},{65506,16},{65507,16},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {65515,16},{65516,16},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{0,0},{0,0},{0,0},{0,0},{0,0}, + {2041,11},{65525,16},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0} }; static const unsigned short UVAC_HT[256][2] = { - {0,2},{1,2},{4,3},{10,4},{24,5},{25,5},{56,6},{120,7},{500,9},{1014,10},{4084,12},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {11,4},{57,6},{246,8},{501,9},{2038,11},{4085,12},{65416,16},{65417,16},{65418,16},{65419,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {26,5},{247,8},{1015,10},{4086,12},{32706,15},{65420,16},{65421,16},{65422,16},{65423,16},{65424,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {27,5},{248,8},{1016,10},{4087,12},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{65430,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {58,6},{502,9},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{65438,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {59,6},{1017,10},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{65446,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {121,7},{2039,11},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{65454,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {122,7},{2040,11},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{65462,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {249,8},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{65470,16},{65471,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {503,9},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{65479,16},{65480,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {504,9},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{65488,16},{65489,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {505,9},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{65497,16},{65498,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {506,9},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{65506,16},{65507,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {2041,11},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{65515,16},{65516,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {16352,14},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{65525,16},{0,0},{0,0},{0,0},{0,0},{0,0}, - {1018,10},{32707,15},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0} + {0,2},{1,2},{4,3},{10,4},{24,5},{25,5},{56,6},{120,7},{500,9},{1014,10},{4084,12},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {11,4},{57,6},{246,8},{501,9},{2038,11},{4085,12},{65416,16},{65417,16},{65418,16},{65419,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {26,5},{247,8},{1015,10},{4086,12},{32706,15},{65420,16},{65421,16},{65422,16},{65423,16},{65424,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {27,5},{248,8},{1016,10},{4087,12},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{65430,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {58,6},{502,9},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{65438,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {59,6},{1017,10},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{65446,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {121,7},{2039,11},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{65454,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {122,7},{2040,11},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{65462,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {249,8},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{65470,16},{65471,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {503,9},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{65479,16},{65480,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {504,9},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{65488,16},{65489,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {505,9},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{65497,16},{65498,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {506,9},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{65506,16},{65507,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {2041,11},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{65515,16},{65516,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {16352,14},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{65525,16},{0,0},{0,0},{0,0},{0,0},{0,0}, + {1018,10},{32707,15},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0} }; static const int YQT[] = {16,11,10,16,24,40,51,61,12,12,14,19,26,58,60,55,14,13,16,24,40,57,69,56,14,17,22,29,51,87,80,62,18,22, - 37,56,68,109,103,77,24,35,55,64,81,104,113,92,49,64,78,87,103,121,120,101,72,92,95,98,112,100,103,99}; + 37,56,68,109,103,77,24,35,55,64,81,104,113,92,49,64,78,87,103,121,120,101,72,92,95,98,112,100,103,99}; static const int UVQT[] = {17,18,24,47,99,99,99,99,18,21,26,66,99,99,99,99,24,26,56,99,99,99,99,99,47,66,99,99,99,99,99,99, - 99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99}; + 99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99}; static const float aasf[] = { 1.0f * 2.828427125f, 1.387039845f * 2.828427125f, 1.306562965f * 2.828427125f, 1.175875602f * 2.828427125f, - 1.0f * 2.828427125f, 0.785694958f * 2.828427125f, 0.541196100f * 2.828427125f, 0.275899379f * 2.828427125f }; + 1.0f * 2.828427125f, 0.785694958f * 2.828427125f, 0.541196100f * 2.828427125f, 0.275899379f * 2.828427125f }; int row, col, i, k; float fdtbl_Y[64], fdtbl_UV[64]; unsigned char YTable[64], UVTable[64]; if(!data || !width || !height || comp > 4 || comp < 1) { - return 0; + return 0; } quality = quality ? quality : 90; @@ -1372,84 +1372,84 @@ static int stbi_write_jpg_core(stbi__write_context *s, int width, int height, in quality = quality < 50 ? 5000 / quality : 200 - quality * 2; for(i = 0; i < 64; ++i) { - int uvti, yti = (YQT[i]*quality+50)/100; - YTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (yti < 1 ? 1 : yti > 255 ? 255 : yti); - uvti = (UVQT[i]*quality+50)/100; - UVTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (uvti < 1 ? 1 : uvti > 255 ? 255 : uvti); + int uvti, yti = (YQT[i]*quality+50)/100; + YTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (yti < 1 ? 1 : yti > 255 ? 255 : yti); + uvti = (UVQT[i]*quality+50)/100; + UVTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (uvti < 1 ? 1 : uvti > 255 ? 255 : uvti); } for(row = 0, k = 0; row < 8; ++row) { - for(col = 0; col < 8; ++col, ++k) { - fdtbl_Y[k] = 1 / (YTable [stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]); - fdtbl_UV[k] = 1 / (UVTable[stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]); - } + for(col = 0; col < 8; ++col, ++k) { + fdtbl_Y[k] = 1 / (YTable [stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]); + fdtbl_UV[k] = 1 / (UVTable[stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]); + } } // Write Headers { - static const unsigned char head0[] = { 0xFF,0xD8,0xFF,0xE0,0,0x10,'J','F','I','F',0,1,1,0,0,1,0,1,0,0,0xFF,0xDB,0,0x84,0 }; - static const unsigned char head2[] = { 0xFF,0xDA,0,0xC,3,1,0,2,0x11,3,0x11,0,0x3F,0 }; - const unsigned char head1[] = { 0xFF,0xC0,0,0x11,8,(unsigned char)(height>>8),STBIW_UCHAR(height),(unsigned char)(width>>8),STBIW_UCHAR(width), - 3,1,0x11,0,2,0x11,1,3,0x11,1,0xFF,0xC4,0x01,0xA2,0 }; - s->func(s->context, (void*)head0, sizeof(head0)); - s->func(s->context, (void*)YTable, sizeof(YTable)); - stbiw__putc(s, 1); - s->func(s->context, UVTable, sizeof(UVTable)); - s->func(s->context, (void*)head1, sizeof(head1)); - s->func(s->context, (void*)(std_dc_luminance_nrcodes+1), sizeof(std_dc_luminance_nrcodes)-1); - s->func(s->context, (void*)std_dc_luminance_values, sizeof(std_dc_luminance_values)); - stbiw__putc(s, 0x10); // HTYACinfo - s->func(s->context, (void*)(std_ac_luminance_nrcodes+1), sizeof(std_ac_luminance_nrcodes)-1); - s->func(s->context, (void*)std_ac_luminance_values, sizeof(std_ac_luminance_values)); - stbiw__putc(s, 1); // HTUDCinfo - s->func(s->context, (void*)(std_dc_chrominance_nrcodes+1), sizeof(std_dc_chrominance_nrcodes)-1); - s->func(s->context, (void*)std_dc_chrominance_values, sizeof(std_dc_chrominance_values)); - stbiw__putc(s, 0x11); // HTUACinfo - s->func(s->context, (void*)(std_ac_chrominance_nrcodes+1), sizeof(std_ac_chrominance_nrcodes)-1); - s->func(s->context, (void*)std_ac_chrominance_values, sizeof(std_ac_chrominance_values)); - s->func(s->context, (void*)head2, sizeof(head2)); + static const unsigned char head0[] = { 0xFF,0xD8,0xFF,0xE0,0,0x10,'J','F','I','F',0,1,1,0,0,1,0,1,0,0,0xFF,0xDB,0,0x84,0 }; + static const unsigned char head2[] = { 0xFF,0xDA,0,0xC,3,1,0,2,0x11,3,0x11,0,0x3F,0 }; + const unsigned char head1[] = { 0xFF,0xC0,0,0x11,8,(unsigned char)(height>>8),STBIW_UCHAR(height),(unsigned char)(width>>8),STBIW_UCHAR(width), + 3,1,0x11,0,2,0x11,1,3,0x11,1,0xFF,0xC4,0x01,0xA2,0 }; + s->func(s->context, (void*)head0, sizeof(head0)); + s->func(s->context, (void*)YTable, sizeof(YTable)); + stbiw__putc(s, 1); + s->func(s->context, UVTable, sizeof(UVTable)); + s->func(s->context, (void*)head1, sizeof(head1)); + s->func(s->context, (void*)(std_dc_luminance_nrcodes+1), sizeof(std_dc_luminance_nrcodes)-1); + s->func(s->context, (void*)std_dc_luminance_values, sizeof(std_dc_luminance_values)); + stbiw__putc(s, 0x10); // HTYACinfo + s->func(s->context, (void*)(std_ac_luminance_nrcodes+1), sizeof(std_ac_luminance_nrcodes)-1); + s->func(s->context, (void*)std_ac_luminance_values, sizeof(std_ac_luminance_values)); + stbiw__putc(s, 1); // HTUDCinfo + s->func(s->context, (void*)(std_dc_chrominance_nrcodes+1), sizeof(std_dc_chrominance_nrcodes)-1); + s->func(s->context, (void*)std_dc_chrominance_values, sizeof(std_dc_chrominance_values)); + stbiw__putc(s, 0x11); // HTUACinfo + s->func(s->context, (void*)(std_ac_chrominance_nrcodes+1), sizeof(std_ac_chrominance_nrcodes)-1); + s->func(s->context, (void*)std_ac_chrominance_values, sizeof(std_ac_chrominance_values)); + s->func(s->context, (void*)head2, sizeof(head2)); } // Encode 8x8 macroblocks { - static const unsigned short fillBits[] = {0x7F, 7}; - const unsigned char *imageData = (const unsigned char *)data; - int DCY=0, DCU=0, DCV=0; - int bitBuf=0, bitCnt=0; - // comp == 2 is grey+alpha (alpha is ignored) - int ofsG = comp > 2 ? 1 : 0, ofsB = comp > 2 ? 2 : 0; - int x, y, pos; - for(y = 0; y < height; y += 8) { - for(x = 0; x < width; x += 8) { - float YDU[64], UDU[64], VDU[64]; - for(row = y, pos = 0; row < y+8; ++row) { - for(col = x; col < x+8; ++col, ++pos) { - int p = (stbi__flip_vertically_on_write ? height-1-row : row)*width*comp + col*comp; - float r, g, b; - if(row >= height) { - p -= width*comp*(row+1 - height); - } - if(col >= width) { - p -= comp*(col+1 - width); - } - - r = imageData[p+0]; - g = imageData[p+ofsG]; - b = imageData[p+ofsB]; - YDU[pos]=+0.29900f*r+0.58700f*g+0.11400f*b-128; - UDU[pos]=-0.16874f*r-0.33126f*g+0.50000f*b; - VDU[pos]=+0.50000f*r-0.41869f*g-0.08131f*b; - } - } - - DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, YDU, fdtbl_Y, DCY, YDC_HT, YAC_HT); - DCU = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, UDU, fdtbl_UV, DCU, UVDC_HT, UVAC_HT); - DCV = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, VDU, fdtbl_UV, DCV, UVDC_HT, UVAC_HT); - } - } - - // Do the bit alignment of the EOI marker - stbiw__jpg_writeBits(s, &bitBuf, &bitCnt, fillBits); + static const unsigned short fillBits[] = {0x7F, 7}; + const unsigned char *imageData = (const unsigned char *)data; + int DCY=0, DCU=0, DCV=0; + int bitBuf=0, bitCnt=0; + // comp == 2 is grey+alpha (alpha is ignored) + int ofsG = comp > 2 ? 1 : 0, ofsB = comp > 2 ? 2 : 0; + int x, y, pos; + for(y = 0; y < height; y += 8) { + for(x = 0; x < width; x += 8) { + float YDU[64], UDU[64], VDU[64]; + for(row = y, pos = 0; row < y+8; ++row) { + for(col = x; col < x+8; ++col, ++pos) { + int p = (stbi__flip_vertically_on_write ? height-1-row : row)*width*comp + col*comp; + float r, g, b; + if(row >= height) { + p -= width*comp*(row+1 - height); + } + if(col >= width) { + p -= comp*(col+1 - width); + } + + r = imageData[p+0]; + g = imageData[p+ofsG]; + b = imageData[p+ofsB]; + YDU[pos]=+0.29900f*r+0.58700f*g+0.11400f*b-128; + UDU[pos]=-0.16874f*r-0.33126f*g+0.50000f*b; + VDU[pos]=+0.50000f*r-0.41869f*g-0.08131f*b; + } + } + + DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, YDU, fdtbl_Y, DCY, YDC_HT, YAC_HT); + DCU = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, UDU, fdtbl_UV, DCU, UVDC_HT, UVAC_HT); + DCV = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, VDU, fdtbl_UV, DCV, UVDC_HT, UVAC_HT); + } + } + + // Do the bit alignment of the EOI marker + stbiw__jpg_writeBits(s, &bitBuf, &bitCnt, fillBits); } // EOI @@ -1472,57 +1472,57 @@ STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const { stbi__write_context s; if (stbi__start_write_file(&s,filename)) { - int r = stbi_write_jpg_core(&s, x, y, comp, data, quality); - stbi__end_write_file(&s); - return r; + int r = stbi_write_jpg_core(&s, x, y, comp, data, quality); + stbi__end_write_file(&s); + return r; } else - return 0; + return 0; } #endif #endif // STB_IMAGE_WRITE_IMPLEMENTATION /* Revision history - 1.09 (2018-02-11) - fix typo in zlib quality API, improve STB_I_W_STATIC in C++ - 1.08 (2018-01-29) - add stbi__flip_vertically_on_write, external zlib, zlib quality, choose PNG filter - 1.07 (2017-07-24) - doc fix - 1.06 (2017-07-23) - writing JPEG (using Jon Olick's code) - 1.05 ??? - 1.04 (2017-03-03) - monochrome BMP expansion - 1.03 ??? - 1.02 (2016-04-02) - avoid allocating large structures on the stack - 1.01 (2016-01-16) - STBIW_REALLOC_SIZED: support allocators with no realloc support - avoid race-condition in crc initialization - minor compile issues - 1.00 (2015-09-14) - installable file IO function - 0.99 (2015-09-13) - warning fixes; TGA rle support - 0.98 (2015-04-08) - added STBIW_MALLOC, STBIW_ASSERT etc - 0.97 (2015-01-18) - fixed HDR asserts, rewrote HDR rle logic - 0.96 (2015-01-17) - add HDR output - fix monochrome BMP - 0.95 (2014-08-17) - add monochrome TGA output - 0.94 (2014-05-31) - rename private functions to avoid conflicts with stb_image.h - 0.93 (2014-05-27) - warning fixes - 0.92 (2010-08-01) - casts to unsigned char to fix warnings - 0.91 (2010-07-17) - first public release - 0.90 first internal release + 1.09 (2018-02-11) + fix typo in zlib quality API, improve STB_I_W_STATIC in C++ + 1.08 (2018-01-29) + add stbi__flip_vertically_on_write, external zlib, zlib quality, choose PNG filter + 1.07 (2017-07-24) + doc fix + 1.06 (2017-07-23) + writing JPEG (using Jon Olick's code) + 1.05 ??? + 1.04 (2017-03-03) + monochrome BMP expansion + 1.03 ??? + 1.02 (2016-04-02) + avoid allocating large structures on the stack + 1.01 (2016-01-16) + STBIW_REALLOC_SIZED: support allocators with no realloc support + avoid race-condition in crc initialization + minor compile issues + 1.00 (2015-09-14) + installable file IO function + 0.99 (2015-09-13) + warning fixes; TGA rle support + 0.98 (2015-04-08) + added STBIW_MALLOC, STBIW_ASSERT etc + 0.97 (2015-01-18) + fixed HDR asserts, rewrote HDR rle logic + 0.96 (2015-01-17) + add HDR output + fix monochrome BMP + 0.95 (2014-08-17) + add monochrome TGA output + 0.94 (2014-05-31) + rename private functions to avoid conflicts with stb_image.h + 0.93 (2014-05-27) + warning fixes + 0.92 (2010-08-01) + casts to unsigned char to fix warnings + 0.91 (2010-07-17) + first public release + 0.90 first internal release */ /* diff --git a/src/vkvg_buff.c b/src/vkvg_buff.c index 54b02e0..8216701 100644 --- a/src/vkvg_buff.c +++ b/src/vkvg_buff.c @@ -36,4 +36,3 @@ void vkvg_buffer_create(VkvgDevice pDev, VkBufferUsageFlags usage, VmaMemoryUsag void vkvg_buffer_destroy(vkvg_buff *buff){ vmaDestroyBuffer (buff->pDev->allocator, buff->buffer, buff->alloc); } - diff --git a/src/vkvg_buff.h b/src/vkvg_buff.h index eaba746..387178f 100644 --- a/src/vkvg_buff.h +++ b/src/vkvg_buff.h @@ -27,14 +27,14 @@ #include "vk_mem_alloc.h" typedef struct vkvg_buff_t { - VkvgDevice pDev; - VkBuffer buffer; - VmaAllocation alloc; - VmaAllocationInfo allocInfo; - VkDescriptorBufferInfo descriptor; + VkvgDevice pDev; + VkBuffer buffer; + VmaAllocation alloc; + VmaAllocationInfo allocInfo; + VkDescriptorBufferInfo descriptor; }vkvg_buff; -void vkvg_buffer_create (VkvgDevice pDev, VkBufferUsageFlags usage, - VmaMemoryUsage memoryPropertyFlags, VkDeviceSize size, vkvg_buff *buff); -void vkvg_buffer_destroy (vkvg_buff* buff); +void vkvg_buffer_create (VkvgDevice pDev, VkBufferUsageFlags usage, + VmaMemoryUsage memoryPropertyFlags, VkDeviceSize size, vkvg_buff *buff); +void vkvg_buffer_destroy (vkvg_buff* buff); #endif diff --git a/src/vkvg_context.c b/src/vkvg_context.c index 0ec252e..7632f52 100644 --- a/src/vkvg_context.c +++ b/src/vkvg_context.c @@ -55,14 +55,14 @@ VkvgContext vkvg_create(VkvgSurface surf) return NULL; } - ctx->sizePoints = VKVG_PTS_SIZE; - ctx->sizeVertices = ctx->sizeVBO = VKVG_VBO_SIZE; - ctx->sizeIndices = ctx->sizeIBO = VKVG_IBO_SIZE; - ctx->sizePathes = VKVG_PATHES_SIZE; - ctx->lineWidth = 1; - ctx->curOperator = VKVG_OPERATOR_OVER; - ctx->curFillRule = VKVG_FILL_RULE_NON_ZERO; - ctx->pSurf = surf; + ctx->sizePoints = VKVG_PTS_SIZE; + ctx->sizeVertices = ctx->sizeVBO = VKVG_VBO_SIZE; + ctx->sizeIndices = ctx->sizeIBO = VKVG_IBO_SIZE; + ctx->sizePathes = VKVG_PATHES_SIZE; + ctx->lineWidth = 1; + ctx->curOperator = VKVG_OPERATOR_OVER; + ctx->curFillRule = VKVG_FILL_RULE_NON_ZERO; + ctx->pSurf = surf; ctx->bounds = (VkRect2D) {{0,0},{ctx->pSurf->width,ctx->pSurf->height}}; ctx->pushConsts = (push_constants) { @@ -129,16 +129,16 @@ VkvgContext vkvg_create(VkvgSurface surf) //for context to be thread safe, command pool and descriptor pool have to be created in the thread of the context. ctx->cmdPool = vkh_cmd_pool_create ((VkhDevice)dev, dev->gQueue->familyIndex, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT); - _create_vertices_buff (ctx); - _create_gradient_buff (ctx); - _create_cmd_buff (ctx); - _createDescriptorPool (ctx); - _init_descriptor_sets (ctx); - _update_descriptor_set (ctx, ctx->pSurf->dev->fontCache->texture, ctx->dsFont); - _update_descriptor_set (ctx, surf->dev->emptyImg, ctx->dsSrc); + _create_vertices_buff (ctx); + _create_gradient_buff (ctx); + _create_cmd_buff (ctx); + _createDescriptorPool (ctx); + _init_descriptor_sets (ctx); + _update_descriptor_set (ctx, ctx->pSurf->dev->fontCache->texture, ctx->dsFont); + _update_descriptor_set (ctx, surf->dev->emptyImg, ctx->dsSrc); _update_gradient_desc_set(ctx); - _clear_path (ctx); + _clear_path (ctx); ctx->cmd = ctx->cmdBuffers[0];//current recording buffer @@ -230,12 +230,12 @@ void vkvg_destroy (VkvgContext ctx) #endif - vkDestroyFence (dev, ctx->flushFence,NULL); + vkDestroyFence (dev, ctx->flushFence,NULL); vkFreeCommandBuffers(dev, ctx->cmdPool, 2, ctx->cmdBuffers); vkDestroyCommandPool(dev, ctx->cmdPool, NULL); VkDescriptorSet dss[] = {ctx->dsFont,ctx->dsSrc, ctx->dsGrad}; - vkFreeDescriptorSets (dev, ctx->descriptorPool, 3, dss); + vkFreeDescriptorSets (dev, ctx->descriptorPool, 3, dss); vkDestroyDescriptorPool (dev, ctx->descriptorPool,NULL); @@ -247,7 +247,7 @@ void vkvg_destroy (VkvgContext ctx) free(ctx->indexCache); //TODO:check this for source counter - //vkh_image_destroy (ctx->source); + //vkh_image_destroy (ctx->source); free(ctx->selectedFontName); free(ctx->pathes); @@ -511,8 +511,8 @@ void vkvg_rectangle (VkvgContext ctx, float x, float y, float w, float h){ vkvg_close_path (ctx); } -static const VkClearAttachment clearStencil = {VK_IMAGE_ASPECT_STENCIL_BIT, 1, {{{0}}}}; -static const VkClearAttachment clearColorAttach = {VK_IMAGE_ASPECT_COLOR_BIT, 0, {{{0}}}}; +static const VkClearAttachment clearStencil = {VK_IMAGE_ASPECT_STENCIL_BIT, 1, {{{0}}}}; +static const VkClearAttachment clearColorAttach = {VK_IMAGE_ASPECT_COLOR_BIT, 0, {{{0}}}}; void vkvg_reset_clip (VkvgContext ctx){ if (ctx->status) @@ -574,18 +574,18 @@ void vkvg_clip_preserve (VkvgContext ctx){ if (ctx->curFillRule == VKVG_FILL_RULE_EVEN_ODD){ _poly_fill (ctx); - CmdBindPipeline (ctx->cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, ctx->pSurf->dev->pipelineClipping); + CmdBindPipeline (ctx->cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, ctx->pSurf->dev->pipelineClipping); }else{ - CmdBindPipeline (ctx->cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, ctx->pSurf->dev->pipelineClipping); - CmdSetStencilReference (ctx->cmd, VK_STENCIL_FRONT_AND_BACK, STENCIL_FILL_BIT); + CmdBindPipeline (ctx->cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, ctx->pSurf->dev->pipelineClipping); + CmdSetStencilReference (ctx->cmd, VK_STENCIL_FRONT_AND_BACK, STENCIL_FILL_BIT); CmdSetStencilCompareMask(ctx->cmd, VK_STENCIL_FRONT_AND_BACK, STENCIL_CLIP_BIT); - CmdSetStencilWriteMask (ctx->cmd, VK_STENCIL_FRONT_AND_BACK, STENCIL_FILL_BIT); + CmdSetStencilWriteMask (ctx->cmd, VK_STENCIL_FRONT_AND_BACK, STENCIL_FILL_BIT); _fill_ec(ctx); _emit_draw_cmd_undrawn_vertices(ctx); } - CmdSetStencilReference (ctx->cmd, VK_STENCIL_FRONT_AND_BACK, STENCIL_CLIP_BIT); + CmdSetStencilReference (ctx->cmd, VK_STENCIL_FRONT_AND_BACK, STENCIL_CLIP_BIT); CmdSetStencilCompareMask(ctx->cmd, VK_STENCIL_FRONT_AND_BACK, STENCIL_FILL_BIT); - CmdSetStencilWriteMask (ctx->cmd, VK_STENCIL_FRONT_AND_BACK, STENCIL_ALL_BIT); + CmdSetStencilWriteMask (ctx->cmd, VK_STENCIL_FRONT_AND_BACK, STENCIL_ALL_BIT); _draw_full_screen_quad (ctx, false); @@ -696,11 +696,11 @@ void _draw_stoke_cap (VkvgContext ctx, float hw, vec2 p0, vec2 n, bool isStart) } } -static bool dashOn = true; -static uint32_t curDash = 0; //current dash index -static float curDashOffset = 0.f; //cur dash offset between defined path point and last dash segment(on/off) start +static bool dashOn = true; +static uint32_t curDash = 0; //current dash index +static float curDashOffset = 0.f; //cur dash offset between defined path point and last dash segment(on/off) start static float totDashLength = 0; //total length of dashes -static vec2 normal = {0}; +static vec2 normal = {0}; float _draw_dashed_segment (VkvgContext ctx, float hw, vec2 pL, vec2 p, vec2 pR, bool isCurve) { if (!dashOn)//we test in fact the next dash start, if dashOn = true => next segment is a void. @@ -766,7 +766,7 @@ void vkvg_stroke_preserve (VkvgContext ctx) if (ctx->dashCount > 0) { //init dash stroke dashOn = true; - curDash = 0; //current dash index + curDash = 0; //current dash index //limit offset to total length of dashes totDashLength = 0; @@ -776,7 +776,7 @@ void vkvg_stroke_preserve (VkvgContext ctx) ctx->status = VKVG_STATUS_INVALID_DASH; return; } - curDashOffset = fmodf(ctx->dashOffset, totDashLength); //cur dash offset between defined path point and last dash segment(on/off) start + curDashOffset = fmodf(ctx->dashOffset, totDashLength); //cur dash offset between defined path point and last dash segment(on/off) start iL = lastPathPointIdx; } else if (_path_is_closed(ctx,ptrPath)){ iL = lastPathPointIdx; @@ -890,7 +890,7 @@ void vkvg_set_source (VkvgContext ctx, VkvgPattern pat){ if (ctx->status) return; _update_cur_pattern (ctx, pat); - vkvg_pattern_reference (pat); + vkvg_pattern_reference (pat); } void vkvg_set_line_width (VkvgContext ctx, float width){ ctx->lineWidth = width; @@ -1068,7 +1068,7 @@ void vkvg_save (VkvgContext ctx){ .extent = {ctx->pSurf->width,ctx->pSurf->height,1}}; vkCmdCopyImage(ctx->cmd, vkh_image_get_vkimage (ctx->pSurf->stencil),VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, - vkh_image_get_vkimage (savStencil), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + vkh_image_get_vkimage (savStencil), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &cregion); vkh_image_set_layout (ctx->cmd, ctx->pSurf->stencil, VK_IMAGE_ASPECT_STENCIL_BIT, @@ -1091,11 +1091,11 @@ void vkvg_save (VkvgContext ctx){ vkh_cmd_label_start(ctx->cmd, "save rp", DBG_LAB_COLOR_SAV); #endif - CmdBindPipeline (ctx->cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, ctx->pSurf->dev->pipelineClipping); + CmdBindPipeline (ctx->cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, ctx->pSurf->dev->pipelineClipping); - CmdSetStencilReference (ctx->cmd, VK_STENCIL_FRONT_AND_BACK, STENCIL_CLIP_BIT|curSaveBit); + CmdSetStencilReference (ctx->cmd, VK_STENCIL_FRONT_AND_BACK, STENCIL_CLIP_BIT|curSaveBit); CmdSetStencilCompareMask(ctx->cmd, VK_STENCIL_FRONT_AND_BACK, STENCIL_CLIP_BIT); - CmdSetStencilWriteMask (ctx->cmd, VK_STENCIL_FRONT_AND_BACK, curSaveBit); + CmdSetStencilWriteMask (ctx->cmd, VK_STENCIL_FRONT_AND_BACK, curSaveBit); _draw_full_screen_quad (ctx, false); @@ -1107,15 +1107,15 @@ void vkvg_save (VkvgContext ctx){ #endif sav->dashOffset = ctx->dashOffset; - sav->dashCount = ctx->dashCount; + sav->dashCount = ctx->dashCount; if (ctx->dashCount > 0) { sav->dashes = (float*)malloc (sizeof(float) * ctx->dashCount); memcpy (sav->dashes, ctx->dashes, sizeof(float) * ctx->dashCount); } - sav->lineWidth = ctx->lineWidth; + sav->lineWidth = ctx->lineWidth; sav->curOperator= ctx->curOperator; - sav->lineCap = ctx->lineCap; - sav->lineWidth = ctx->lineWidth; + sav->lineCap = ctx->lineCap; + sav->lineWidth = ctx->lineWidth; sav->curFillRule= ctx->curFillRule; sav->selectedCharSize = ctx->selectedCharSize; @@ -1124,10 +1124,10 @@ void vkvg_save (VkvgContext ctx){ sav->currentFont = ctx->currentFont; sav->textDirection= ctx->textDirection; - sav->pushConsts = ctx->pushConsts; - //sav->pattern = ctx->pattern;//TODO:pattern sav must be imutable (copy?) + sav->pushConsts = ctx->pushConsts; + //sav->pattern = ctx->pattern;//TODO:pattern sav must be imutable (copy?) - sav->pNext = ctx->pSavedCtxs; + sav->pNext = ctx->pSavedCtxs; ctx->pSavedCtxs = sav; ctx->curSavBit++; @@ -1151,7 +1151,7 @@ void vkvg_restore (VkvgContext ctx){ vkvg_context_save_t* sav = ctx->pSavedCtxs; ctx->pSavedCtxs = sav->pNext; - ctx->pushConsts = sav->pushConsts; + ctx->pushConsts = sav->pushConsts; /*if (sav->pattern) _update_cur_pattern (ctx, sav->pattern);*/ @@ -1166,11 +1166,11 @@ void vkvg_restore (VkvgContext ctx){ vkh_cmd_label_start(ctx->cmd, "restore rp", DBG_LAB_COLOR_SAV); #endif - CmdBindPipeline (ctx->cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, ctx->pSurf->dev->pipelineClipping); + CmdBindPipeline (ctx->cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, ctx->pSurf->dev->pipelineClipping); - CmdSetStencilReference (ctx->cmd, VK_STENCIL_FRONT_AND_BACK, STENCIL_CLIP_BIT|curSaveBit); + CmdSetStencilReference (ctx->cmd, VK_STENCIL_FRONT_AND_BACK, STENCIL_CLIP_BIT|curSaveBit); CmdSetStencilCompareMask(ctx->cmd, VK_STENCIL_FRONT_AND_BACK, curSaveBit); - CmdSetStencilWriteMask (ctx->cmd, VK_STENCIL_FRONT_AND_BACK, STENCIL_CLIP_BIT); + CmdSetStencilWriteMask (ctx->cmd, VK_STENCIL_FRONT_AND_BACK, STENCIL_CLIP_BIT); _draw_full_screen_quad (ctx, false); @@ -1206,7 +1206,7 @@ void vkvg_restore (VkvgContext ctx){ .dstSubresource = {VK_IMAGE_ASPECT_STENCIL_BIT, 0, 0, 1}, .extent = {ctx->pSurf->width,ctx->pSurf->height,1}}; vkCmdCopyImage(ctx->cmd, - vkh_image_get_vkimage (savStencil), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + vkh_image_get_vkimage (savStencil), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, vkh_image_get_vkimage (ctx->pSurf->stencil),VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &cregion); vkh_image_set_layout (ctx->cmd, ctx->pSurf->stencil, VK_IMAGE_ASPECT_STENCIL_BIT, @@ -1226,16 +1226,16 @@ void vkvg_restore (VkvgContext ctx){ ctx->dashOffset = sav->dashOffset; if (ctx->dashCount > 0) free (ctx->dashes); - ctx->dashCount = sav->dashCount; + ctx->dashCount = sav->dashCount; if (ctx->dashCount > 0) { ctx->dashes = (float*)malloc (sizeof(float) * ctx->dashCount); memcpy (ctx->dashes, sav->dashes, sizeof(float) * ctx->dashCount); } - ctx->lineWidth = sav->lineWidth; + ctx->lineWidth = sav->lineWidth; ctx->curOperator= sav->curOperator; - ctx->lineCap = sav->lineCap; - ctx->lineJoin = sav->lineJoint; + ctx->lineCap = sav->lineCap; + ctx->lineJoin = sav->lineJoint; ctx->curFillRule= sav->curFillRule; ctx->selectedCharSize = sav->selectedCharSize; diff --git a/src/vkvg_context_internal.c b/src/vkvg_context_internal.c index 384805a..c005f7b 100644 --- a/src/vkvg_context_internal.c +++ b/src/vkvg_context_internal.c @@ -21,12 +21,12 @@ */ //credits for bezier algorithms to: -// Anti-Grain Geometry (AGG) - Version 2.5 -// A high quality rendering engine for C++ -// Copyright (C) 2002-2006 Maxim Shemanarev -// Contact: mcseem@antigrain.com -// mcseemagg@yahoo.com -// http://antigrain.com +// Anti-Grain Geometry (AGG) - Version 2.5 +// A high quality rendering engine for C++ +// Copyright (C) 2002-2006 Maxim Shemanarev +// Contact: mcseem@antigrain.com +// mcseemagg@yahoo.com +// http://antigrain.com #include "vkvg_surface_internal.h" @@ -315,9 +315,9 @@ void _add_triangle_indices(VkvgContext ctx, VKVG_IBO_INDEX_TYPE i0, VKVG_IBO_IND void _vao_add_rectangle (VkvgContext ctx, float x, float y, float width, float height){ Vertex v[4] = { - {{x,y}, ctx->curColor, {0,0,-1}}, - {{x,y+height}, ctx->curColor, {0,0,-1}}, - {{x+width,y}, ctx->curColor, {0,0,-1}}, + {{x,y}, ctx->curColor, {0,0,-1}}, + {{x,y+height}, ctx->curColor, {0,0,-1}}, + {{x+width,y}, ctx->curColor, {0,0,-1}}, {{x+width,y+height},ctx->curColor, {0,0,-1}} }; VKVG_IBO_INDEX_TYPE firstIdx = (VKVG_IBO_INDEX_TYPE)(ctx->vertCount - ctx->curVertOffset); @@ -422,7 +422,7 @@ void _flush_vertices_caches (VkvgContext ctx) { //this func expect cmdStarted to be true void _end_render_pass (VkvgContext ctx) { LOG(VKVG_LOG_INFO, "END RENDER PASS: ctx = %p;\n", ctx); - CmdEndRenderPass (ctx->cmd); + CmdEndRenderPass (ctx->cmd); #if defined(DEBUG) && defined (VKVG_DBG_UTILS) vkh_cmd_label_end (ctx->cmd); #endif @@ -482,7 +482,7 @@ void _flush_cmd_buff (VkvgContext ctx){ return; _end_render_pass (ctx); LOG(VKVG_LOG_INFO, "FLUSH CTX: ctx = %p; vertices = %d; indices = %d\n", ctx, ctx->vertCount, ctx->indCount); - _flush_vertices_caches (ctx); + _flush_vertices_caches (ctx); vkh_cmd_end (ctx->cmd); _wait_and_submit_cmd (ctx); @@ -544,7 +544,7 @@ void _start_cmd_for_render_pass (VkvgContext ctx) { CmdBindVertexBuffers(ctx->cmd, 0, 1, &ctx->vertices.buffer, offsets); CmdBindIndexBuffer(ctx->cmd, ctx->indices.buffer, 0, VKVG_VK_INDEX_TYPE); - _update_push_constants (ctx); + _update_push_constants (ctx); _bind_draw_pipeline (ctx); CmdSetStencilCompareMask(ctx->cmd, VK_STENCIL_FRONT_AND_BACK, STENCIL_CLIP_BIT); @@ -574,12 +574,12 @@ void _update_cur_pattern (VkvgContext ctx, VkvgPattern pat) { }else newPatternType = pat->type; - switch (newPatternType) { + switch (newPatternType) { case VKVG_PATTERN_TYPE_SOLID: if (lastPat->type == VKVG_PATTERN_TYPE_SURFACE){ //unbind current source surface by replacing it with empty texture - _flush_cmd_buff (ctx); - _update_descriptor_set (ctx, ctx->pSurf->dev->emptyImg, ctx->dsSrc); + _flush_cmd_buff (ctx); + _update_descriptor_set (ctx, ctx->pSurf->dev->emptyImg, ctx->dsSrc); } break; case VKVG_PATTERN_TYPE_SURFACE: @@ -697,7 +697,7 @@ void _update_gradient_desc_set (VkvgContext ctx){ /*void _reset_src_descriptor_set (VkvgContext ctx){ VkvgDevice dev = ctx->pSurf->dev; //VkDescriptorSet dss[] = {ctx->dsSrc}; - vkFreeDescriptorSets (dev->vkDev, ctx->descriptorPool, 1, &ctx->dsSrc); + vkFreeDescriptorSets (dev->vkDev, ctx->descriptorPool, 1, &ctx->dsSrc); VkDescriptorSetAllocateInfo descriptorSetAllocateInfo = { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, .descriptorPool = ctx->descriptorPool, @@ -736,14 +736,14 @@ void _init_descriptor_sets (VkvgContext ctx){ float _build_vb_step (vkvg_context* ctx, float hw, vec2 pL, vec2 p0, vec2 pR, bool isCurve){ Vertex v = {{0},ctx->curColor, {0,0,-1}}; - vec2 v0 = vec2_sub(p0, pL); - vec2 v1 = vec2_sub(pR, p0); - float length_v0 = vec2_length(v0); - float length_v1 = vec2_length(v1); - if (length_v0 < FLT_EPSILON || length_v1 < FLT_EPSILON) - return 0; - vec2 v0n = vec2_div (v0, length_v0); - vec2 v1n = vec2_div (v1, length_v1); + vec2 v0 = vec2_sub(p0, pL); + vec2 v1 = vec2_sub(pR, p0); + float length_v0 = vec2_length(v0); + float length_v1 = vec2_length(v1); + if (length_v0 < FLT_EPSILON || length_v1 < FLT_EPSILON) + return 0; + vec2 v0n = vec2_div (v0, length_v0); + vec2 v1n = vec2_div (v1, length_v1); vec2 bisec = vec2_norm(vec2_add(v0n,v1n)); @@ -757,8 +757,8 @@ float _build_vb_step (vkvg_context* ctx, float hw, vec2 pL, vec2 p0, vec2 pR, bo float lh = hw / cosf(alpha); bisec = vec2_perp(bisec); - //limit bisectrice lenght, may be improved but ok for perf - lh=fminf (lh, fminf (sqrtf(length_v0*length_v0+hw*hw), sqrtf(length_v1*length_v1+hw*hw))); + //limit bisectrice lenght, may be improved but ok for perf + lh=fminf (lh, fminf (sqrtf(length_v0*length_v0+hw*hw), sqrtf(length_v1*length_v1+hw*hw))); bisec = vec2_mult(bisec,lh); @@ -767,9 +767,9 @@ float _build_vb_step (vkvg_context* ctx, float hw, vec2 pL, vec2 p0, vec2 pR, bo if (ctx->lineJoin == VKVG_LINE_JOIN_MITER || isCurve){ v.pos = vec2_add(p0, bisec); _add_vertex(ctx, v); - v.pos = vec2_sub(p0, bisec); - _add_vertex(ctx, v); - _add_tri_indices_for_rect(ctx, idx); + v.pos = vec2_sub(p0, bisec); + _add_vertex(ctx, v); + _add_tri_indices_for_rect(ctx, idx); }else{ vec2 vp = vec2_perp(v0n); if (cross<0){ @@ -780,7 +780,7 @@ float _build_vb_step (vkvg_context* ctx, float hw, vec2 pL, vec2 p0, vec2 pR, bo v.pos = vec2_add (p0, vec2_mult (vp, hw)); _add_vertex(ctx, v); v.pos = vec2_sub (p0, bisec); - } + } _add_vertex(ctx, v); if (ctx->lineJoin == VKVG_LINE_JOIN_BEVEL){ @@ -876,11 +876,11 @@ void _free_ctx_save (vkvg_context_save_t* sav){ } -#define M_APPROXIMATION_SCALE 1.0 -#define M_ANGLE_TOLERANCE 0.01 -#define M_DISTANCE_TOLERANCE 1.0 -#define M_CUSP_LIMIT 0.01 -#define CURVE_RECURSION_LIMIT 10 +#define M_APPROXIMATION_SCALE 1.0 +#define M_ANGLE_TOLERANCE 0.01 +#define M_DISTANCE_TOLERANCE 1.0 +#define M_CUSP_LIMIT 0.01 +#define CURVE_RECURSION_LIMIT 10 #define CURVE_COLLINEARITY_EPSILON 1.7 #define CURVE_ANGLE_TOLERANCE_EPSILON 0.001 //no floating point arithmetic operation allowed in macro. @@ -896,16 +896,16 @@ void _recursive_bezier (VkvgContext ctx, // Calculate all the mid-points of the line segments //---------------------- - float x12 = (x1 + x2) / 2; - float y12 = (y1 + y2) / 2; - float x23 = (x2 + x3) / 2; - float y23 = (y2 + y3) / 2; - float x34 = (x3 + x4) / 2; - float y34 = (y3 + y4) / 2; - float x123 = (x12 + x23) / 2; - float y123 = (y12 + y23) / 2; - float x234 = (x23 + x34) / 2; - float y234 = (y23 + y34) / 2; + float x12 = (x1 + x2) / 2; + float y12 = (y1 + y2) / 2; + float x23 = (x2 + x3) / 2; + float y23 = (y2 + y3) / 2; + float x34 = (x3 + x4) / 2; + float y34 = (y3 + y4) / 2; + float x123 = (x12 + x23) / 2; + float y123 = (y12 + y23) / 2; + float x234 = (x23 + x34) / 2; + float y234 = (y23 + y34) / 2; float x1234 = (x123 + x234) / 2; float y1234 = (y123 + y234) / 2; diff --git a/src/vkvg_context_internal.h b/src/vkvg_context_internal.h index 592bc1f..ec5ea29 100644 --- a/src/vkvg_context_internal.h +++ b/src/vkvg_context_internal.h @@ -65,123 +65,123 @@ typedef struct{ }Vertex; typedef struct { - vec4 source; - vec2 size; - uint32_t patternType; - uint32_t fullScreenQuad; - vkvg_matrix_t mat; - vkvg_matrix_t matInv; + vec4 source; + vec2 size; + uint32_t patternType; + uint32_t fullScreenQuad; + vkvg_matrix_t mat; + vkvg_matrix_t matInv; }push_constants; typedef struct _vkvg_context_save_t{ struct _vkvg_context_save_t* pNext; - float lineWidth; - uint32_t dashCount; //value count in dash array, 0 if dash not set. - float dashOffset; //an offset for dash - float* dashes; //an array of alternate lengths of on and off stroke. + float lineWidth; + uint32_t dashCount; //value count in dash array, 0 if dash not set. + float dashOffset; //an offset for dash + float* dashes; //an array of alternate lengths of on and off stroke. - vkvg_operator_t curOperator; - vkvg_line_cap_t lineCap; - vkvg_line_join_t lineJoint; - vkvg_fill_rule_t curFillRule; + vkvg_operator_t curOperator; + vkvg_line_cap_t lineCap; + vkvg_line_join_t lineJoint; + vkvg_fill_rule_t curFillRule; FT_F26Dot6 selectedCharSize; /* Font size*/ char* selectedFontName; - _vkvg_font_identity_t selectedFont; //hold current face and size before cache addition - _vkvg_font_identity_t* currentFont; //font ready for lookup - vkvg_direction_t textDirection; - push_constants pushConsts; - VkvgPattern pattern; + _vkvg_font_identity_t selectedFont; //hold current face and size before cache addition + _vkvg_font_identity_t* currentFont; //font ready for lookup + vkvg_direction_t textDirection; + push_constants pushConsts; + VkvgPattern pattern; }vkvg_context_save_t; typedef struct _vkvg_context_t { - VkvgContext pPrev; //double linked list of contexts - VkvgContext pNext; - uint32_t references; //reference count - - VkvgSurface pSurf; //surface bound to context, set on creation of ctx - VkFence flushFence; //context fence - VkhImage source; //source of painting operation - - VkCommandPool cmdPool; //local pools ensure thread safety - VkCommandBuffer cmdBuffers[2];//double cmd buff for context operations - VkCommandBuffer cmd; //current recording buffer - bool cmdStarted; //prevent flushing empty renderpass - bool pushCstDirty;//prevent pushing to gpu if not requested + VkvgContext pPrev; //double linked list of contexts + VkvgContext pNext; + uint32_t references; //reference count + + VkvgSurface pSurf; //surface bound to context, set on creation of ctx + VkFence flushFence; //context fence + VkhImage source; //source of painting operation + + VkCommandPool cmdPool; //local pools ensure thread safety + VkCommandBuffer cmdBuffers[2];//double cmd buff for context operations + VkCommandBuffer cmd; //current recording buffer + bool cmdStarted; //prevent flushing empty renderpass + bool pushCstDirty;//prevent pushing to gpu if not requested VkDescriptorPool descriptorPool;//one pool per thread - VkDescriptorSet dsFont; //fonts glyphs texture atlas descriptor (local for thread safety) - VkDescriptorSet dsSrc; //source ds - VkDescriptorSet dsGrad; //gradient uniform buffer + VkDescriptorSet dsFont; //fonts glyphs texture atlas descriptor (local for thread safety) + VkDescriptorSet dsSrc; //source ds + VkDescriptorSet dsGrad; //gradient uniform buffer - VkRect2D bounds; + VkRect2D bounds; - uint32_t curColor; + uint32_t curColor; float xMin; float xMax; float yMin; float yMax; - vkvg_buff uboGrad; //uniform buff obj holdings gradient infos + vkvg_buff uboGrad; //uniform buff obj holdings gradient infos //vk buffers, holds data until flush - vkvg_buff indices; //index buffer with persistent map memory - uint32_t sizeIBO; //size of vk ibo - uint32_t sizeIndices; //reserved size - uint32_t indCount; //current indice count + vkvg_buff indices; //index buffer with persistent map memory + uint32_t sizeIBO; //size of vk ibo + uint32_t sizeIndices; //reserved size + uint32_t indCount; //current indice count - uint32_t curIndStart; //last index recorded in cmd buff - uint32_t curVertOffset; //vertex offset in draw indexed command + uint32_t curIndStart; //last index recorded in cmd buff + uint32_t curVertOffset; //vertex offset in draw indexed command - vkvg_buff vertices; //vertex buffer with persistent mapped memory - uint32_t sizeVBO; //size of vk vbo size - uint32_t sizeVertices; //reserved size - uint32_t vertCount; //effective vertices count + vkvg_buff vertices; //vertex buffer with persistent mapped memory + uint32_t sizeVBO; //size of vk vbo size + uint32_t sizeVertices; //reserved size + uint32_t vertCount; //effective vertices count Vertex* vertexCache; VKVG_IBO_INDEX_TYPE* indexCache; //pathes, exists until stroke of fill - vec2* points; //points array - uint32_t sizePoints; //reserved size - uint32_t pointCount; //effective points count + vec2* points; //points array + uint32_t sizePoints; //reserved size + uint32_t pointCount; //effective points count //pathes array is a list of point count per segment - uint32_t pathPtr; //pointer in the path array + uint32_t pathPtr; //pointer in the path array uint32_t* pathes; uint32_t sizePathes; - uint32_t segmentPtr; //current segment count in current path having curves + uint32_t segmentPtr; //current segment count in current path having curves float lineWidth; - uint32_t dashCount; //value count in dash array, 0 if dash not set. - float dashOffset; //an offset for dash - float* dashes; //an array of alternate lengths of on and off stroke. + uint32_t dashCount; //value count in dash array, 0 if dash not set. + float dashOffset; //an offset for dash + float* dashes; //an array of alternate lengths of on and off stroke. - vkvg_operator_t curOperator; - vkvg_line_cap_t lineCap; - vkvg_line_join_t lineJoin; - vkvg_fill_rule_t curFillRule; + vkvg_operator_t curOperator; + vkvg_line_cap_t lineCap; + vkvg_line_join_t lineJoin; + vkvg_fill_rule_t curFillRule; FT_F26Dot6 selectedCharSize; /* Font size*/ char* selectedFontName; - //_vkvg_font_t selectedFont; //hold current face and size before cache addition - _vkvg_font_identity_t* currentFont; //font pointing to cached fonts identity + //_vkvg_font_t selectedFont; //hold current face and size before cache addition + _vkvg_font_identity_t* currentFont; //font pointing to cached fonts identity _vkvg_font_t* currentFontSize; //font structure by size ready for lookup - vkvg_direction_t textDirection; + vkvg_direction_t textDirection; - push_constants pushConsts; - VkvgPattern pattern; - vkvg_status_t status; + push_constants pushConsts; + VkvgPattern pattern; + vkvg_status_t status; - vkvg_context_save_t* pSavedCtxs; //last ctx saved ptr - uint8_t curSavBit; //current stencil bit used to save context, 6 bits used by stencil for save/restore - VkhImage* savedStencils; //additional image for saving contexes once more than 6 save/restore are reached + vkvg_context_save_t* pSavedCtxs; //last ctx saved ptr + uint8_t curSavBit; //current stencil bit used to save context, 6 bits used by stencil for save/restore + VkhImage* savedStencils; //additional image for saving contexes once more than 6 save/restore are reached - VkClearRect clearRect; + VkClearRect clearRect; VkRenderPassBeginInfo renderPassBeginInfo; }vkvg_context; @@ -201,52 +201,52 @@ void _finish_path (VkvgContext ctx); void _clear_path (VkvgContext ctx); void _remove_last_point (VkvgContext ctx); bool _path_is_closed (VkvgContext ctx, uint32_t ptrPath); -void _set_curve_start (VkvgContext ctx); -void _set_curve_end (VkvgContext ctx); -bool _path_has_curves (VkvgContext ctx, uint32_t ptrPath); +void _set_curve_start (VkvgContext ctx); +void _set_curve_end (VkvgContext ctx); +bool _path_has_curves (VkvgContext ctx, uint32_t ptrPath); -float _normalizeAngle (float a); -float _get_arc_step (VkvgContext ctx, float radius); +float _normalizeAngle (float a); +float _get_arc_step (VkvgContext ctx, float radius); -vec2 _get_current_position (VkvgContext ctx); -void _add_point (VkvgContext ctx, float x, float y); +vec2 _get_current_position (VkvgContext ctx); +void _add_point (VkvgContext ctx, float x, float y); -void _resetMinMax (VkvgContext ctx); +void _resetMinMax (VkvgContext ctx); -void _poly_fill (VkvgContext ctx); -void _fill_ec (VkvgContext ctx);//earclipping fill +void _poly_fill (VkvgContext ctx); +void _fill_ec (VkvgContext ctx);//earclipping fill void _draw_full_screen_quad (VkvgContext ctx, bool useScissor); -void _create_gradient_buff (VkvgContext ctx); +void _create_gradient_buff (VkvgContext ctx); void _create_vertices_buff (VkvgContext ctx); void _add_vertex (VkvgContext ctx, Vertex v); -void _add_vertexf (VkvgContext ctx, float x, float y); +void _add_vertexf (VkvgContext ctx, float x, float y); void _set_vertex (VkvgContext ctx, uint32_t idx, Vertex v); void _add_triangle_indices (VkvgContext ctx, VKVG_IBO_INDEX_TYPE i0, VKVG_IBO_INDEX_TYPE i1, VKVG_IBO_INDEX_TYPE i2); void _add_tri_indices_for_rect (VkvgContext ctx, VKVG_IBO_INDEX_TYPE i); -float _build_vb_step (vkvg_context* ctx, float hw, vec2 pL, vec2 p0, vec2 pR, bool isCurve); -void _vao_add_rectangle (VkvgContext ctx, float x, float y, float width, float height); +float _build_vb_step (vkvg_context* ctx, float hw, vec2 pL, vec2 p0, vec2 pR, bool isCurve); +void _vao_add_rectangle (VkvgContext ctx, float x, float y, float width, float height); -void _bind_draw_pipeline (VkvgContext ctx); +void _bind_draw_pipeline (VkvgContext ctx); void _create_cmd_buff (VkvgContext ctx); void _check_vao_size (VkvgContext ctx); -void _ensure_renderpass_is_started (VkvgContext ctx); +void _ensure_renderpass_is_started (VkvgContext ctx); void _flush_cmd_buff (VkvgContext ctx); void _emit_draw_cmd_undrawn_vertices(VkvgContext ctx); void _flush_cmd_until_vx_base (VkvgContext ctx); -void _wait_flush_fence (VkvgContext ctx); -void _reset_flush_fence (VkvgContext ctx); -void _wait_and_submit_cmd (VkvgContext ctx); +void _wait_flush_fence (VkvgContext ctx); +void _reset_flush_fence (VkvgContext ctx); +void _wait_and_submit_cmd (VkvgContext ctx); void _update_push_constants (VkvgContext ctx); -void _update_cur_pattern (VkvgContext ctx, VkvgPattern pat); +void _update_cur_pattern (VkvgContext ctx, VkvgPattern pat); void _set_mat_inv_and_vkCmdPush (VkvgContext ctx); void _start_cmd_for_render_pass (VkvgContext ctx); -void _createDescriptorPool (VkvgContext ctx); -void _init_descriptor_sets (VkvgContext ctx); +void _createDescriptorPool (VkvgContext ctx); +void _init_descriptor_sets (VkvgContext ctx); void _update_descriptor_set (VkvgContext ctx, VkhImage img, VkDescriptorSet ds); void _update_gradient_desc_set(VkvgContext ctx); -void _free_ctx_save (vkvg_context_save_t* sav); +void _free_ctx_save (vkvg_context_save_t* sav); static inline float vec2_zcross (vec2 v1, vec2 v2){ return v1.x*v2.y-v1.y*v2.x; diff --git a/src/vkvg_device.c b/src/vkvg_device.c index 00de9d5..251e5bf 100644 --- a/src/vkvg_device.c +++ b/src/vkvg_device.c @@ -36,12 +36,12 @@ VkvgDevice vkvg_device_create_multisample(VkInstance inst, VkPhysicalDevice phy, VkvgDevice dev = (vkvg_device*)calloc(1,sizeof(vkvg_device)); dev->instance = inst; - dev->hdpi = 96; - dev->vdpi = 96; + dev->hdpi = 96; + dev->vdpi = 96; dev->samples= samples; dev->deferredResolve = deferredResolve; - dev->vkDev = vkdev; - dev->phy = phy; + dev->vkDev = vkdev; + dev->phy = phy; #if VKVG_DBG_STATS dev->debug_stats = (vkvg_debug_stats_t) {0}; @@ -74,12 +74,12 @@ VkvgDevice vkvg_device_create_multisample(VkInstance inst, VkPhysicalDevice phy, dev->lastCtx= NULL; - dev->cmdPool= vkh_cmd_pool_create ((VkhDevice)dev, dev->gQueue->familyIndex, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT); - dev->cmd = vkh_cmd_buff_create ((VkhDevice)dev, dev->cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY); - dev->fence = vkh_fence_create_signaled ((VkhDevice)dev); + dev->cmdPool= vkh_cmd_pool_create ((VkhDevice)dev, dev->gQueue->familyIndex, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT); + dev->cmd = vkh_cmd_buff_create ((VkhDevice)dev, dev->cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY); + dev->fence = vkh_fence_create_signaled ((VkhDevice)dev); - _create_pipeline_cache (dev); - _init_fonts_cache (dev); + _create_pipeline_cache (dev); + _init_fonts_cache (dev); if (dev->deferredResolve || dev->samples == VK_SAMPLE_COUNT_1_BIT){ dev->renderPass = _createRenderPassNoResolve (dev, VK_ATTACHMENT_LOAD_OP_LOAD, VK_ATTACHMENT_LOAD_OP_LOAD); dev->renderPass_ClearStencil = _createRenderPassNoResolve (dev, VK_ATTACHMENT_LOAD_OP_LOAD, VK_ATTACHMENT_LOAD_OP_CLEAR); @@ -89,10 +89,10 @@ VkvgDevice vkvg_device_create_multisample(VkInstance inst, VkPhysicalDevice phy, dev->renderPass_ClearStencil = _createRenderPassMS (dev, VK_ATTACHMENT_LOAD_OP_LOAD, VK_ATTACHMENT_LOAD_OP_CLEAR); dev->renderPass_ClearAll = _createRenderPassMS (dev, VK_ATTACHMENT_LOAD_OP_CLEAR, VK_ATTACHMENT_LOAD_OP_CLEAR); } - _createDescriptorSetLayout (dev); - _setupPipelines (dev); + _createDescriptorSetLayout (dev); + _setupPipelines (dev); - _create_empty_texture (dev, format, dev->supportedTiling); + _create_empty_texture (dev, format, dev->supportedTiling); dev->references = 1; @@ -131,35 +131,35 @@ void vkvg_device_destroy (VkvgDevice dev) LOG(VKVG_LOG_INFO, "DESTROY Device\n"); - vkh_image_destroy (dev->emptyImg); + vkh_image_destroy (dev->emptyImg); - vkDestroyDescriptorSetLayout (dev->vkDev, dev->dslGrad,NULL); - vkDestroyDescriptorSetLayout (dev->vkDev, dev->dslFont,NULL); - vkDestroyDescriptorSetLayout (dev->vkDev, dev->dslSrc, NULL); + vkDestroyDescriptorSetLayout (dev->vkDev, dev->dslGrad,NULL); + vkDestroyDescriptorSetLayout (dev->vkDev, dev->dslFont,NULL); + vkDestroyDescriptorSetLayout (dev->vkDev, dev->dslSrc, NULL); - vkDestroyPipeline (dev->vkDev, dev->pipelinePolyFill, NULL); - vkDestroyPipeline (dev->vkDev, dev->pipelineClipping, NULL); + vkDestroyPipeline (dev->vkDev, dev->pipelinePolyFill, NULL); + vkDestroyPipeline (dev->vkDev, dev->pipelineClipping, NULL); - vkDestroyPipeline (dev->vkDev, dev->pipe_OVER, NULL); - vkDestroyPipeline (dev->vkDev, dev->pipe_SUB, NULL); - vkDestroyPipeline (dev->vkDev, dev->pipe_CLEAR, NULL); + vkDestroyPipeline (dev->vkDev, dev->pipe_OVER, NULL); + vkDestroyPipeline (dev->vkDev, dev->pipe_SUB, NULL); + vkDestroyPipeline (dev->vkDev, dev->pipe_CLEAR, NULL); #ifdef VKVG_WIRED_DEBUG - vkDestroyPipeline (dev->vkDev, dev->pipelineWired, NULL); - vkDestroyPipeline (dev->vkDev, dev->pipelineLineList, NULL); + vkDestroyPipeline (dev->vkDev, dev->pipelineWired, NULL); + vkDestroyPipeline (dev->vkDev, dev->pipelineLineList, NULL); #endif - vkDestroyPipelineLayout (dev->vkDev, dev->pipelineLayout, NULL); - vkDestroyPipelineCache (dev->vkDev, dev->pipelineCache, NULL); - vkDestroyRenderPass (dev->vkDev, dev->renderPass, NULL); - vkDestroyRenderPass (dev->vkDev, dev->renderPass_ClearStencil, NULL); - vkDestroyRenderPass (dev->vkDev, dev->renderPass_ClearAll, NULL); + vkDestroyPipelineLayout (dev->vkDev, dev->pipelineLayout, NULL); + vkDestroyPipelineCache (dev->vkDev, dev->pipelineCache, NULL); + vkDestroyRenderPass (dev->vkDev, dev->renderPass, NULL); + vkDestroyRenderPass (dev->vkDev, dev->renderPass_ClearStencil, NULL); + vkDestroyRenderPass (dev->vkDev, dev->renderPass_ClearAll, NULL); - vkWaitForFences (dev->vkDev, 1, &dev->fence, VK_TRUE, UINT64_MAX); + vkWaitForFences (dev->vkDev, 1, &dev->fence, VK_TRUE, UINT64_MAX); - vkDestroyFence (dev->vkDev, dev->fence,NULL); - vkFreeCommandBuffers (dev->vkDev, dev->cmdPool, 1, &dev->cmd); - vkDestroyCommandPool (dev->vkDev, dev->cmdPool, NULL); + vkDestroyFence (dev->vkDev, dev->fence,NULL); + vkFreeCommandBuffers (dev->vkDev, dev->cmdPool, 1, &dev->cmd); + vkDestroyCommandPool (dev->vkDev, dev->cmdPool, NULL); vkh_queue_destroy(dev->gQueue); diff --git a/src/vkvg_device_internal.c b/src/vkvg_device_internal.c index f99b6a1..39e2d12 100644 --- a/src/vkvg_device_internal.c +++ b/src/vkvg_device_internal.c @@ -30,23 +30,23 @@ uint8_t vkvg_log_level = VKVG_LOG_ERR; -PFN_vkCmdBindPipeline CmdBindPipeline; -PFN_vkCmdBindDescriptorSets CmdBindDescriptorSets; -PFN_vkCmdBindIndexBuffer CmdBindIndexBuffer; -PFN_vkCmdBindVertexBuffers CmdBindVertexBuffers; +PFN_vkCmdBindPipeline CmdBindPipeline; +PFN_vkCmdBindDescriptorSets CmdBindDescriptorSets; +PFN_vkCmdBindIndexBuffer CmdBindIndexBuffer; +PFN_vkCmdBindVertexBuffers CmdBindVertexBuffers; -PFN_vkCmdDrawIndexed CmdDrawIndexed; -PFN_vkCmdDraw CmdDraw; +PFN_vkCmdDrawIndexed CmdDrawIndexed; +PFN_vkCmdDraw CmdDraw; -PFN_vkCmdSetStencilCompareMask CmdSetStencilCompareMask; -PFN_vkCmdSetStencilReference CmdSetStencilReference; -PFN_vkCmdSetStencilWriteMask CmdSetStencilWriteMask; -PFN_vkCmdBeginRenderPass CmdBeginRenderPass; -PFN_vkCmdEndRenderPass CmdEndRenderPass; -PFN_vkCmdSetViewport CmdSetViewport; -PFN_vkCmdSetScissor CmdSetScissor; +PFN_vkCmdSetStencilCompareMask CmdSetStencilCompareMask; +PFN_vkCmdSetStencilReference CmdSetStencilReference; +PFN_vkCmdSetStencilWriteMask CmdSetStencilWriteMask; +PFN_vkCmdBeginRenderPass CmdBeginRenderPass; +PFN_vkCmdEndRenderPass CmdEndRenderPass; +PFN_vkCmdSetViewport CmdSetViewport; +PFN_vkCmdSetScissor CmdSetScissor; -PFN_vkCmdPushConstants CmdPushConstants; +PFN_vkCmdPushConstants CmdPushConstants; void _flush_all_contexes (VkvgDevice dev){ VkvgContext ctx = dev->lastCtx; @@ -86,12 +86,12 @@ VkRenderPass _createRenderPassNoResolve(VkvgDevice dev, VkAttachmentLoadOp loadO .finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL }; VkAttachmentDescription attachments[] = {attColor,attDS}; - VkAttachmentReference colorRef = {0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL}; - VkAttachmentReference dsRef = {1, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL}; + VkAttachmentReference colorRef = {0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL}; + VkAttachmentReference dsRef = {1, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL}; VkSubpassDescription subpassDescription = { .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .colorAttachmentCount = 1, - .pColorAttachments = &colorRef, + .colorAttachmentCount = 1, + .pColorAttachments = &colorRef, .pDepthStencilAttachment= &dsRef}; VkSubpassDependency dependencies[] = @@ -150,13 +150,13 @@ VkRenderPass _createRenderPassMS(VkvgDevice dev, VkAttachmentLoadOp loadOp, VkAt VkAttachmentDescription attachments[] = {attColorResolve,attDS,attColor}; VkAttachmentReference resolveRef= {0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL}; - VkAttachmentReference dsRef = {1, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL}; - VkAttachmentReference colorRef = {2, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL}; + VkAttachmentReference dsRef = {1, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL}; + VkAttachmentReference colorRef = {2, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL}; VkSubpassDescription subpassDescription = { .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .colorAttachmentCount = 1, - .pColorAttachments = &colorRef, - .pResolveAttachments = &resolveRef, + .colorAttachmentCount = 1, + .pColorAttachments = &colorRef, + .pResolveAttachments = &resolveRef, .pDepthStencilAttachment= &dsRef}; VkSubpassDependency dependencies[] = @@ -424,20 +424,20 @@ bool _init_function_pointers (VkvgDevice dev) { } vkh_device_init_debug_utils ((VkhDevice)dev); #endif - CmdBindPipeline = GetVkProcAddress(dev->vkDev, dev->instance, vkCmdBindPipeline); - CmdBindDescriptorSets = GetVkProcAddress(dev->vkDev, dev->instance, vkCmdBindDescriptorSets); - CmdBindIndexBuffer = GetVkProcAddress(dev->vkDev, dev->instance, vkCmdBindIndexBuffer); - CmdBindVertexBuffers = GetVkProcAddress(dev->vkDev, dev->instance, vkCmdBindVertexBuffers); - CmdDrawIndexed = GetVkProcAddress(dev->vkDev, dev->instance, vkCmdDrawIndexed); - CmdDraw = GetVkProcAddress(dev->vkDev, dev->instance, vkCmdDraw); + CmdBindPipeline = GetVkProcAddress(dev->vkDev, dev->instance, vkCmdBindPipeline); + CmdBindDescriptorSets = GetVkProcAddress(dev->vkDev, dev->instance, vkCmdBindDescriptorSets); + CmdBindIndexBuffer = GetVkProcAddress(dev->vkDev, dev->instance, vkCmdBindIndexBuffer); + CmdBindVertexBuffers = GetVkProcAddress(dev->vkDev, dev->instance, vkCmdBindVertexBuffers); + CmdDrawIndexed = GetVkProcAddress(dev->vkDev, dev->instance, vkCmdDrawIndexed); + CmdDraw = GetVkProcAddress(dev->vkDev, dev->instance, vkCmdDraw); CmdSetStencilCompareMask= GetVkProcAddress(dev->vkDev, dev->instance, vkCmdSetStencilCompareMask); - CmdSetStencilReference = GetVkProcAddress(dev->vkDev, dev->instance, vkCmdSetStencilReference); - CmdSetStencilWriteMask = GetVkProcAddress(dev->vkDev, dev->instance, vkCmdSetStencilWriteMask); - CmdBeginRenderPass = GetVkProcAddress(dev->vkDev, dev->instance, vkCmdBeginRenderPass); - CmdEndRenderPass = GetVkProcAddress(dev->vkDev, dev->instance, vkCmdEndRenderPass); - CmdSetViewport = GetVkProcAddress(dev->vkDev, dev->instance, vkCmdSetViewport); - CmdSetScissor = GetVkProcAddress(dev->vkDev, dev->instance, vkCmdSetScissor); - CmdPushConstants = GetVkProcAddress(dev->vkDev, dev->instance, vkCmdPushConstants); + CmdSetStencilReference = GetVkProcAddress(dev->vkDev, dev->instance, vkCmdSetStencilReference); + CmdSetStencilWriteMask = GetVkProcAddress(dev->vkDev, dev->instance, vkCmdSetStencilWriteMask); + CmdBeginRenderPass = GetVkProcAddress(dev->vkDev, dev->instance, vkCmdBeginRenderPass); + CmdEndRenderPass = GetVkProcAddress(dev->vkDev, dev->instance, vkCmdEndRenderPass); + CmdSetViewport = GetVkProcAddress(dev->vkDev, dev->instance, vkCmdSetViewport); + CmdSetScissor = GetVkProcAddress(dev->vkDev, dev->instance, vkCmdSetScissor); + CmdPushConstants = GetVkProcAddress(dev->vkDev, dev->instance, vkCmdPushConstants); return true; } @@ -498,11 +498,11 @@ void _dump_image_format_properties (VkvgDevice dev, VkFormat format) { format, VK_IMAGE_TYPE_2D, VKVG_TILING, VK_IMAGE_USAGE_SAMPLED_BIT|VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT|VK_IMAGE_USAGE_TRANSFER_SRC_BIT|VK_IMAGE_USAGE_TRANSFER_DST_BIT, 0, &imgProps)); - printf ("tiling = %d\n", VKVG_TILING); - printf ("max extend = (%d, %d, %d)\n", imgProps.maxExtent.width, imgProps.maxExtent.height, imgProps.maxExtent.depth); - printf ("max mip levels = %d\n", imgProps.maxMipLevels); + printf ("tiling = %d\n", VKVG_TILING); + printf ("max extend = (%d, %d, %d)\n", imgProps.maxExtent.width, imgProps.maxExtent.height, imgProps.maxExtent.depth); + printf ("max mip levels = %d\n", imgProps.maxMipLevels); printf ("max array layers = %d\n", imgProps.maxArrayLayers); - printf ("sample counts = "); + printf ("sample counts = "); if (imgProps.sampleCounts & VK_SAMPLE_COUNT_1_BIT) printf ("1,"); if (imgProps.sampleCounts & VK_SAMPLE_COUNT_2_BIT) diff --git a/src/vkvg_device_internal.h b/src/vkvg_device_internal.h index 05fc5e6..202a0e2 100644 --- a/src/vkvg_device_internal.h +++ b/src/vkvg_device_internal.h @@ -26,55 +26,55 @@ #include "vkvg.h" #include "vkvg_fonts.h" -#define STENCIL_FILL_BIT 0x1 -#define STENCIL_CLIP_BIT 0x2 -#define STENCIL_ALL_BIT 0x3 +#define STENCIL_FILL_BIT 0x1 +#define STENCIL_CLIP_BIT 0x2 +#define STENCIL_ALL_BIT 0x3 -extern PFN_vkCmdBindPipeline CmdBindPipeline; -extern PFN_vkCmdBindDescriptorSets CmdBindDescriptorSets; -extern PFN_vkCmdBindIndexBuffer CmdBindIndexBuffer; -extern PFN_vkCmdBindVertexBuffers CmdBindVertexBuffers; +extern PFN_vkCmdBindPipeline CmdBindPipeline; +extern PFN_vkCmdBindDescriptorSets CmdBindDescriptorSets; +extern PFN_vkCmdBindIndexBuffer CmdBindIndexBuffer; +extern PFN_vkCmdBindVertexBuffers CmdBindVertexBuffers; -extern PFN_vkCmdDrawIndexed CmdDrawIndexed; -extern PFN_vkCmdDraw CmdDraw; +extern PFN_vkCmdDrawIndexed CmdDrawIndexed; +extern PFN_vkCmdDraw CmdDraw; extern PFN_vkCmdSetStencilCompareMask CmdSetStencilCompareMask; -extern PFN_vkCmdSetStencilReference CmdSetStencilReference; -extern PFN_vkCmdSetStencilWriteMask CmdSetStencilWriteMask; -extern PFN_vkCmdBeginRenderPass CmdBeginRenderPass; -extern PFN_vkCmdEndRenderPass CmdEndRenderPass; -extern PFN_vkCmdSetViewport CmdSetViewport; -extern PFN_vkCmdSetScissor CmdSetScissor; +extern PFN_vkCmdSetStencilReference CmdSetStencilReference; +extern PFN_vkCmdSetStencilWriteMask CmdSetStencilWriteMask; +extern PFN_vkCmdBeginRenderPass CmdBeginRenderPass; +extern PFN_vkCmdEndRenderPass CmdEndRenderPass; +extern PFN_vkCmdSetViewport CmdSetViewport; +extern PFN_vkCmdSetScissor CmdSetScissor; -extern PFN_vkCmdPushConstants CmdPushConstants; +extern PFN_vkCmdPushConstants CmdPushConstants; typedef struct _vkvg_device_t{ - VkDevice vkDev; /**< Vulkan Logical Device */ - VkPhysicalDeviceMemoryProperties phyMemProps; /**< Vulkan Physical device memory properties */ - VkPhysicalDevice phy; /**< Vulkan Physical device */ - VmaAllocator allocator; /**< Vulkan Memory allocator */ - VkInstance instance; /**< Vulkan instance */ + VkDevice vkDev; /**< Vulkan Logical Device */ + VkPhysicalDeviceMemoryProperties phyMemProps; /**< Vulkan Physical device memory properties */ + VkPhysicalDevice phy; /**< Vulkan Physical device */ + VmaAllocator allocator; /**< Vulkan Memory allocator */ + VkInstance instance; /**< Vulkan instance */ VkImageTiling supportedTiling; /**< Supported image tiling for surface, 0xFF=no support */ VkFormat stencilFormat; /**< Supported vulkan image format for stencil */ - VkhQueue gQueue; /**< Vulkan Queue with Graphic flag */ - MUTEX gQMutex; /**< queue submission has to be externally syncronized */ - VkRenderPass renderPass; /**< Vulkan render pass, common for all surfaces */ + VkhQueue gQueue; /**< Vulkan Queue with Graphic flag */ + MUTEX gQMutex; /**< queue submission has to be externally syncronized */ + VkRenderPass renderPass; /**< Vulkan render pass, common for all surfaces */ VkRenderPass renderPass_ClearStencil;/**< Vulkan render pass for first draw with context, stencil has to be cleared */ - VkRenderPass renderPass_ClearAll; /**< Vulkan render pass for new surface, clear all attacments*/ + VkRenderPass renderPass_ClearAll; /**< Vulkan render pass for new surface, clear all attacments*/ - uint32_t references; /**< Reference count, prevent destroying device if still in use */ - VkCommandPool cmdPool; /**< Global command pool for processing on surfaces without context */ - VkCommandBuffer cmd; /**< Global command buffer */ - VkFence fence; /**< this fence is kept signaled when idle, wait and reset are called before each recording. */ + uint32_t references; /**< Reference count, prevent destroying device if still in use */ + VkCommandPool cmdPool; /**< Global command pool for processing on surfaces without context */ + VkCommandBuffer cmd; /**< Global command buffer */ + VkFence fence; /**< this fence is kept signaled when idle, wait and reset are called before each recording. */ - VkPipeline pipe_OVER; /**< default operator */ + VkPipeline pipe_OVER; /**< default operator */ VkPipeline pipe_SUB; - VkPipeline pipe_CLEAR; /**< clear operator */ + VkPipeline pipe_CLEAR; /**< clear operator */ - VkPipeline pipelinePolyFill; /**< even-odd polygon filling first step */ - VkPipeline pipelineClipping; /**< draw on stencil to update clipping regions */ + VkPipeline pipelinePolyFill; /**< even-odd polygon filling first step */ + VkPipeline pipelineClipping; /**< draw on stencil to update clipping regions */ #ifdef VKVG_WIRED_DEBUG VkPipeline pipelineWired; @@ -83,35 +83,35 @@ typedef struct _vkvg_device_t{ #if VKVG_DBG_STATS vkvg_debug_stats_t debug_stats; /**< debug statistics on memory usage and vulkan ressources */ #endif - VkPipelineCache pipelineCache; /**< speed up startup by caching configured pipelines on disk */ - VkPipelineLayout pipelineLayout; /**< layout common to all pipelines */ - VkDescriptorSetLayout dslFont; /**< font cache descriptors layout */ - VkDescriptorSetLayout dslSrc; /**< context source surface descriptors layout */ - VkDescriptorSetLayout dslGrad; /**< context gradient descriptors layout */ + VkPipelineCache pipelineCache; /**< speed up startup by caching configured pipelines on disk */ + VkPipelineLayout pipelineLayout; /**< layout common to all pipelines */ + VkDescriptorSetLayout dslFont; /**< font cache descriptors layout */ + VkDescriptorSetLayout dslSrc; /**< context source surface descriptors layout */ + VkDescriptorSetLayout dslGrad; /**< context gradient descriptors layout */ - int hdpi, /**< only used for FreeType fonts and svg loading */ + int hdpi, /**< only used for FreeType fonts and svg loading */ vdpi; - VkhImage emptyImg; /**< prevent unbound descriptor to trigger Validation error 61 */ - VkSampleCountFlags samples; /**< samples count common to all surfaces */ - bool deferredResolve; /**< if true, resolve only on context destruction and set as source */ - vkvg_status_t status; /**< Current status of device, affected by last operation */ + VkhImage emptyImg; /**< prevent unbound descriptor to trigger Validation error 61 */ + VkSampleCountFlags samples; /**< samples count common to all surfaces */ + bool deferredResolve; /**< if true, resolve only on context destruction and set as source */ + vkvg_status_t status; /**< Current status of device, affected by last operation */ - _font_cache_t* fontCache; /**< Store everything relative to common font caching system */ - VkvgContext lastCtx; /**< last element of double linked list of context, used to trigger font caching system update on all contexts*/ + _font_cache_t* fontCache; /**< Store everything relative to common font caching system */ + VkvgContext lastCtx; /**< last element of double linked list of context, used to trigger font caching system update on all contexts*/ }vkvg_device; bool _init_function_pointers (VkvgDevice dev); -void _create_empty_texture (VkvgDevice dev, VkFormat format, VkImageTiling tiling); +void _create_empty_texture (VkvgDevice dev, VkFormat format, VkImageTiling tiling); void _get_best_image_tiling (VkvgDevice dev, VkFormat format, VkImageTiling* pTiling); void _check_best_image_tiling (VkvgDevice dev, VkFormat format); -void _create_pipeline_cache (VkvgDevice dev); +void _create_pipeline_cache (VkvgDevice dev); VkRenderPass _createRenderPassMS(VkvgDevice dev, VkAttachmentLoadOp loadOp, VkAttachmentLoadOp stencilLoadOp); VkRenderPass _createRenderPassNoResolve(VkvgDevice dev, VkAttachmentLoadOp loadOp, VkAttachmentLoadOp stencilLoadOp); -void _setupPipelines (VkvgDevice dev); +void _setupPipelines (VkvgDevice dev); void _createDescriptorSetLayout (VkvgDevice dev); -void _flush_all_contexes (VkvgDevice dev); -void _wait_idle (VkvgDevice dev); +void _flush_all_contexes (VkvgDevice dev); +void _wait_idle (VkvgDevice dev); void _wait_and_reset_device_fence (VkvgDevice dev); -void _submit_cmd (VkvgDevice dev, VkCommandBuffer* cmd, VkFence fence); +void _submit_cmd (VkvgDevice dev, VkCommandBuffer* cmd, VkFence fence); #endif diff --git a/src/vkvg_fonts.c b/src/vkvg_fonts.c index 96fed15..fc6abf0 100644 --- a/src/vkvg_fonts.c +++ b/src/vkvg_fonts.c @@ -70,7 +70,7 @@ void _init_fonts_cache (VkvgDevice dev){ cache->cmd = vkh_cmd_buff_create((VkhDevice)dev,dev->cmdPool,VK_COMMAND_BUFFER_LEVEL_PRIMARY); //Set texture cache initial layout to shaderReadOnly to prevent error msg if cache is not fill - VkImageSubresourceRange subres = {VK_IMAGE_ASPECT_COLOR_BIT,0,1,0,cache->texLength}; + VkImageSubresourceRange subres = {VK_IMAGE_ASPECT_COLOR_BIT,0,1,0,cache->texLength}; vkh_cmd_begin (cache->cmd,VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT); vkh_image_set_layout_subres(cache->cmd, cache->texture, subres, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, @@ -91,9 +91,9 @@ void _increase_font_tex_array (VkvgDevice dev){ _font_cache_t* cache = dev->fontCache; - vkWaitForFences (dev->vkDev, 1, &cache->uploadFence, VK_TRUE, UINT64_MAX); + vkWaitForFences (dev->vkDev, 1, &cache->uploadFence, VK_TRUE, UINT64_MAX); vkResetCommandBuffer(cache->cmd, 0); - vkResetFences (dev->vkDev, 1, &cache->uploadFence); + vkResetFences (dev->vkDev, 1, &cache->uploadFence); uint8_t newSize = cache->texLength + FONT_CACHE_INIT_LAYERS; VkhImage newImg = vkh_tex2d_array_create ((VkhDevice)dev, cache->texFormat, FONT_PAGE_SIZE, FONT_PAGE_SIZE, @@ -102,8 +102,8 @@ void _increase_font_tex_array (VkvgDevice dev){ vkh_image_create_descriptor (newImg, VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_ASPECT_COLOR_BIT, VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST,VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER); - VkImageSubresourceRange subresNew = {VK_IMAGE_ASPECT_COLOR_BIT,0,1,0,newSize}; - VkImageSubresourceRange subres = {VK_IMAGE_ASPECT_COLOR_BIT,0,1,0,cache->texLength}; + VkImageSubresourceRange subresNew = {VK_IMAGE_ASPECT_COLOR_BIT,0,1,0,newSize}; + VkImageSubresourceRange subres = {VK_IMAGE_ASPECT_COLOR_BIT,0,1,0,cache->texLength}; vkh_cmd_begin (cache->cmd,VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT); @@ -127,19 +127,19 @@ void _increase_font_tex_array (VkvgDevice dev){ VK_CHECK_RESULT(vkEndCommandBuffer(cache->cmd)); - _submit_cmd (dev, &cache->cmd, cache->uploadFence); - vkWaitForFences (dev->vkDev, 1, &cache->uploadFence, VK_TRUE, UINT64_MAX); + _submit_cmd (dev, &cache->cmd, cache->uploadFence); + vkWaitForFences (dev->vkDev, 1, &cache->uploadFence, VK_TRUE, UINT64_MAX); cache->pensY = (int*)realloc(cache->pensY, newSize * sizeof(int)); void* tmp = memset (&cache->pensY[cache->texLength],0,FONT_CACHE_INIT_LAYERS*sizeof(int)); - vkh_image_destroy (cache->texture); + vkh_image_destroy (cache->texture); cache->texLength = newSize; - cache->texture = newImg; + cache->texture = newImg; VkvgContext next = dev->lastCtx; while (next != NULL){ - _update_descriptor_set (next, cache->texture, next->dsFont); + _update_descriptor_set (next, cache->texture, next->dsFont); next = next->pPrev; } _wait_idle(dev); @@ -157,8 +157,8 @@ void _init_next_line_in_tex_cache (VkvgDevice dev, _vkvg_font_t* f){ cache->pensY[i] += f->curLine.height; return; } - _flush_chars_to_tex (dev, f); - _increase_font_tex_array (dev); + _flush_chars_to_tex (dev, f); + _increase_font_tex_array (dev); _init_next_line_in_tex_cache(dev, f); } void _destroy_font_cache (VkvgDevice dev){ @@ -192,9 +192,9 @@ void _destroy_font_cache (VkvgDevice dev){ vkvg_buffer_destroy (&cache->buff); - vkh_image_destroy (cache->texture); + vkh_image_destroy (cache->texture); //vkFreeCommandBuffers(dev->vkDev,dev->cmdPool, 1, &cache->cmd); - vkDestroyFence (dev->vkDev,cache->uploadFence,NULL); + vkDestroyFence (dev->vkDev,cache->uploadFence,NULL); FT_Done_FreeType(cache->library); FcConfigDestroy(cache->config); @@ -207,7 +207,7 @@ void _destroy_font_cache (VkvgDevice dev){ #ifdef DEBUG //helper function void _dump_glyphs (FT_Face face){ - FT_GlyphSlot slot; + FT_GlyphSlot slot; char gname[256]; for (int i = 0; i < face->num_glyphs; ++i) { @@ -230,15 +230,15 @@ void _flush_chars_to_tex (VkvgDevice dev, _vkvg_font_t* f) { return; LOG(VKVG_LOG_INFO, "_flush_chars_to_tex pen(%d, %d)\n",f->curLine.penX, f->curLine.penY); - vkWaitForFences (dev->vkDev,1,&cache->uploadFence,VK_TRUE,UINT64_MAX); + vkWaitForFences (dev->vkDev,1,&cache->uploadFence,VK_TRUE,UINT64_MAX); vkResetCommandBuffer(cache->cmd,0); - vkResetFences (dev->vkDev,1,&cache->uploadFence); + vkResetFences (dev->vkDev,1,&cache->uploadFence); memcpy(cache->buff.allocInfo.pMappedData, cache->hostBuff, (uint64_t)f->curLine.height * FONT_PAGE_SIZE * cache->texPixelSize); vkh_cmd_begin (cache->cmd,VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT); - VkImageSubresourceRange subres = {VK_IMAGE_ASPECT_COLOR_BIT,0,1,f->curLine.pageIdx,1}; + VkImageSubresourceRange subres = {VK_IMAGE_ASPECT_COLOR_BIT,0,1,f->curLine.pageIdx,1}; vkh_image_set_layout_subres(cache->cmd, cache->texture, subres, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); @@ -274,10 +274,10 @@ _char_ref* _prepare_char (VkvgDevice dev, _vkvg_font_t* f, FT_UInt gindex){ FT_CHECK_RESULT(FT_Load_Glyph(f->face, gindex, FT_LOAD_RENDER)); #endif - FT_GlyphSlot slot = f->face->glyph; - FT_Bitmap bmp = slot->bitmap; - uint8_t* data = dev->fontCache->hostBuff; - uint32_t bmpWidth= bmp.width; //real width in pixel of char bitmap + FT_GlyphSlot slot = f->face->glyph; + FT_Bitmap bmp = slot->bitmap; + uint8_t* data = dev->fontCache->hostBuff; + uint32_t bmpWidth= bmp.width; //real width in pixel of char bitmap #if defined(VKVG_LCD_FONT_FILTER) && defined(FT_CONFIG_OPTION_SUBPIXEL_RENDERING) bmpWidth /= 3; @@ -323,7 +323,7 @@ _char_ref* _prepare_char (VkvgDevice dev, _vkvg_font_t* f, FT_UInt gindex){ return cr; } void _select_font_path (VkvgContext ctx, const char* fontFile){ - ctx->currentFont = NULL; + ctx->currentFont = NULL; } //select current font for context void _select_font_face (VkvgContext ctx, const char* name){ @@ -347,7 +347,7 @@ _vkvg_font_t* _find_or_create_font_size (VkvgContext ctx, _vkvg_font_identity_t* return &font->sizes[i]; } //if not found, create a new font size structure - _font_cache_t* cache = (_font_cache_t*)ctx->pSurf->dev->fontCache; + _font_cache_t* cache = (_font_cache_t*)ctx->pSurf->dev->fontCache; VkvgDevice dev = ctx->pSurf->dev; if (++font->sizeCount == 1) @@ -375,7 +375,7 @@ _vkvg_font_t* _find_or_create_font_size (VkvgContext ctx, _vkvg_font_identity_t* //try find font already resolved with fontconfig by font name _vkvg_font_identity_t* _tryFindFontByName (VkvgContext ctx, const char* fontName){ - _font_cache_t* cache = (_font_cache_t*)ctx->pSurf->dev->fontCache; + _font_cache_t* cache = (_font_cache_t*)ctx->pSurf->dev->fontCache; for (int i = 0; i < cache->fontsCount; ++i) { for (uint32_t j = 0; j < cache->fonts[i].fcNamesCount; j++) { if (strcmp (cache->fonts[i].fcNames[j], fontName) == 0) @@ -386,7 +386,7 @@ _vkvg_font_identity_t* _tryFindFontByName (VkvgContext ctx, const char* fontName } _vkvg_font_identity_t* _tryResolveFontNameWithFontConfig (VkvgContext ctx, const char* fontName) { _vkvg_font_identity_t* resolvedFont = NULL; - _font_cache_t* cache = (_font_cache_t*)ctx->pSurf->dev->fontCache; + _font_cache_t* cache = (_font_cache_t*)ctx->pSurf->dev->fontCache; FcPattern* pat = FcNameParse((const FcChar8*)ctx->selectedFontName); FcConfigSubstitute(cache->config, pat, FcMatchPattern); FcDefaultSubstitute(pat); @@ -450,16 +450,16 @@ void _update_current_font (VkvgContext ctx) { hb_buffer_t * _get_hb_buffer (_vkvg_font_t* font, const char* text) { hb_buffer_t *buf = hb_buffer_create(); - const char *lng = "fr"; + const char *lng = "fr"; hb_script_t script = HB_SCRIPT_LATIN; script = hb_script_from_string (text, (int)strlen (text)); hb_direction_t dir = hb_script_get_horizontal_direction(script); //dir = HB_DIRECTION_TTB; hb_buffer_set_direction (buf, dir); - hb_buffer_set_script (buf, script); - hb_buffer_set_language (buf, hb_language_from_string (lng, (int)strlen(lng))); - hb_buffer_add_utf8 (buf, text, (int)strlen(text), 0, (int)strlen(text)); + hb_buffer_set_script (buf, script); + hb_buffer_set_language (buf, hb_language_from_string (lng, (int)strlen(lng))); + hb_buffer_add_utf8 (buf, text, (int)strlen(text), 0, (int)strlen(text)); hb_shape (font->hb_font, buf, NULL, 0); return buf; @@ -507,7 +507,7 @@ void _create_text_run (VkvgContext ctx, const char* text, VkvgText textRun) { textRun->font = ctx->currentFontSize; textRun->dev = ctx->pSurf->dev; - textRun->glyph_pos = hb_buffer_get_glyph_positions (textRun->hbBuf, &textRun->glyph_count); + textRun->glyph_pos = hb_buffer_get_glyph_positions (textRun->hbBuf, &textRun->glyph_count); unsigned int string_width_in_pixels = 0; for (uint32_t i=0; i < textRun->glyph_count; ++i) @@ -520,7 +520,7 @@ void _create_text_run (VkvgContext ctx, const char* text, VkvgText textRun) { textRun->extents.y_bearing = -(float)(textRun->glyph_pos[0].y_offset >> 6); textRun->extents.height = (float)(FT_MulFix(ctx->currentFontSize->face->height, metrics->y_scale) >> 6);// (metrics->ascender + metrics->descender) >> 6; - textRun->extents.width = textRun->extents.x_advance; + textRun->extents.width = textRun->extents.x_advance; } void _destroy_text_run (VkvgText textRun) { hb_buffer_destroy (textRun->hbBuf); @@ -586,9 +586,9 @@ void _show_text_run (VkvgContext ctx, VkvgText tr) { #ifdef DEBUG void _show_texture (vkvg_context* ctx){ Vertex vs[] = { - {{0,0}, 0, {0,0,0}}, - {{0,FONT_PAGE_SIZE}, 0, {0,1,0}}, - {{FONT_PAGE_SIZE,0}, 0, {1,0,0}}, + {{0,0}, 0, {0,0,0}}, + {{0,FONT_PAGE_SIZE}, 0, {0,1,0}}, + {{FONT_PAGE_SIZE,0}, 0, {1,0,0}}, {{FONT_PAGE_SIZE,FONT_PAGE_SIZE}, 0, {1,1,0}} }; @@ -620,9 +620,9 @@ void _show_text (VkvgContext ctx, const char* text){ /*void testfonts(){ - FT_Library library; - FT_Face face; - FT_GlyphSlot slot; + FT_Library library; + FT_Face face; + FT_GlyphSlot slot; assert(!FT_Init_FreeType(&library)); assert(!FT_New_Face(library, "/usr/share/fonts/truetype/dejavu/DejaVuSansMono.ttf", 0, &face)); @@ -634,14 +634,14 @@ void _show_text (VkvgContext ctx, const char* text){ hb_buffer_t *buf = hb_buffer_create(); const char *text = "Ленивый рыжий кот"; - const char *lng = "en"; + const char *lng = "en"; //"كسول الزنجبيل القط","懶惰的姜貓", hb_buffer_set_direction (buf, HB_DIRECTION_LTR); - hb_buffer_set_script (buf, HB_SCRIPT_LATIN); - hb_buffer_set_language (buf, hb_language_from_string(lng,strlen(lng))); - hb_buffer_add_utf8 (buf, text, strlen(text), 0, strlen(text)); + hb_buffer_set_script (buf, HB_SCRIPT_LATIN); + hb_buffer_set_language (buf, hb_language_from_string(lng,strlen(lng))); + hb_buffer_add_utf8 (buf, text, strlen(text), 0, strlen(text)); hb_unicode_funcs_t * unifc = hb_unicode_funcs_get_default(); hb_script_t sc = hb_buffer_get_script(buf); @@ -653,7 +653,6 @@ void _show_text (VkvgContext ctx, const char* text){ //hb_script_to_iso15924_tag() - FT_Done_Face ( face ); + FT_Done_Face ( face ); FT_Done_FreeType( library ); }*/ - diff --git a/src/vkvg_fonts.h b/src/vkvg_fonts.h index ce06786..528139a 100644 --- a/src/vkvg_fonts.h +++ b/src/vkvg_fonts.h @@ -37,8 +37,8 @@ #include -#define FONT_PAGE_SIZE 1024 -#define FONT_CACHE_INIT_LAYERS 1 +#define FONT_PAGE_SIZE 1024 +#define FONT_CACHE_INIT_LAYERS 1 #define FONT_FILE_NAME_MAX_SIZE 1024 #define FONT_NAME_MAX_SIZE 128 @@ -47,38 +47,38 @@ #include "vkvg_buff.h" #include "vkh.h" -#define FT_CHECK_RESULT(f) \ +#define FT_CHECK_RESULT(f) \ { \ FT_Error res = (f); \ if (res != 0) \ { \ - fprintf(stderr,"Fatal : FreeType error is %d in %s at line %d\n", res, __FILE__, __LINE__); \ + fprintf(stderr,"Fatal : FreeType error is %d in %s at line %d\n", res, __FILE__, __LINE__); \ assert(res == 0); \ } \ } //texture coordinates of one character in font cache array texture. typedef struct { - vec4 bounds; /* normalized float bounds of character bitmap in font cache texture. */ - vec2i16 bmpDiff; /* Difference in pixel between char bitmap top left corner and char glyph*/ - uint8_t pageIdx; /* Page index in font cache texture array */ + vec4 bounds; /* normalized float bounds of character bitmap in font cache texture. */ + vec2i16 bmpDiff; /* Difference in pixel between char bitmap top left corner and char glyph*/ + uint8_t pageIdx; /* Page index in font cache texture array */ }_char_ref; // Current location in font cache texture array for new character addition. Each font holds such structure to locate // where to upload new chars. typedef struct { - uint8_t pageIdx; /* Current page number in font cache */ - int penX; /* Current X in cache for next char addition */ - int penY; /* Current Y in cache for next char addition */ - int height; /* Height of current line pointed by this structure */ + uint8_t pageIdx; /* Current page number in font cache */ + int penX; /* Current X in cache for next char addition */ + int penY; /* Current Y in cache for next char addition */ + int height; /* Height of current line pointed by this structure */ }_tex_ref_t; // Loaded font structure, one per size, holds informations for glyphes upload in cache and the lookup table of characters. typedef struct { - FT_F26Dot6 charSize; /* Font size*/ - FT_Face face; /* FreeType face*/ - hb_font_t* hb_font; /* HarfBuzz font instance*/ - _char_ref** charLookup; /* Lookup table of characteres in cache, if not found, upload is queued*/ + FT_F26Dot6 charSize; /* Font size*/ + FT_Face face; /* FreeType face*/ + hb_font_t* hb_font; /* HarfBuzz font instance*/ + _char_ref** charLookup; /* Lookup table of characteres in cache, if not found, upload is queued*/ - _tex_ref_t curLine; /* tex coord where to add new char bmp's */ + _tex_ref_t curLine; /* tex coord where to add new char bmp's */ }_vkvg_font_t; /* Font identification structure */ typedef struct { @@ -86,41 +86,41 @@ typedef struct { uint32_t fcNamesCount; /* Count of resolved names by fontConfig */ char* fontFile; /* Font file full path*/ uint32_t sizeCount; /* available font size loaded */ - _vkvg_font_t* sizes /* loaded font size array */ + _vkvg_font_t* sizes; /* loaded font size array */ }_vkvg_font_identity_t; // Font cache global structure, entry point for all font related operations. typedef struct { - FT_Library library; /* FreeType library*/ - FcConfig* config; /* Font config, used to find font files by font names*/ + FT_Library library; /* FreeType library*/ + FcConfig* config; /* Font config, used to find font files by font names*/ - int stagingX; /* x pen in host buffer */ - uint8_t* hostBuff; /* host memory where bitmaps are first loaded */ + int stagingX; /* x pen in host buffer */ + uint8_t* hostBuff; /* host memory where bitmaps are first loaded */ - VkCommandBuffer cmd; /* vulkan command buffer for font textures upload */ - vkvg_buff buff; /* stagin buffer */ - VkhImage texture; /* 2d array texture used by contexts to draw characteres */ - VkFormat texFormat; /* Format of the fonts texture array */ - uint8_t texPixelSize; /* Size in byte of a single pixel in a font texture */ - uint8_t texLength; /* layer count of 2d array texture, starts with FONT_CACHE_INIT_LAYERS count and increased when needed */ - int* pensY; /* array of current y pen positions for each texture in cache 2d array */ - VkFence uploadFence; /* Signaled when upload is finished */ + VkCommandBuffer cmd; /* vulkan command buffer for font textures upload */ + vkvg_buff buff; /* stagin buffer */ + VkhImage texture; /* 2d array texture used by contexts to draw characteres */ + VkFormat texFormat; /* Format of the fonts texture array */ + uint8_t texPixelSize; /* Size in byte of a single pixel in a font texture */ + uint8_t texLength; /* layer count of 2d array texture, starts with FONT_CACHE_INIT_LAYERS count and increased when needed */ + int* pensY; /* array of current y pen positions for each texture in cache 2d array */ + VkFence uploadFence; /* Signaled when upload is finished */ - _vkvg_font_identity_t* fonts; /* Loaded fonts structure array */ - int32_t fontsCount; /* Loaded fonts array count*/ + _vkvg_font_identity_t* fonts; /* Loaded fonts structure array */ + int32_t fontsCount; /* Loaded fonts array count*/ }_font_cache_t; // Precompute everything necessary to draw one line of text, usefull to draw the same text multiple times. typedef struct _vkvg_text_run_t { - hb_buffer_t* hbBuf; /* HarfBuzz buffer of text */ - _vkvg_font_t* font; /* vkvg font structure pointer */ - VkvgDevice dev; /* vkvg device associated with this text run */ - vkvg_text_extents_t extents; /* store computed text extends */ - const char* text; /* utf8 char array of text*/ + hb_buffer_t* hbBuf; /* HarfBuzz buffer of text */ + _vkvg_font_t* font; /* vkvg font structure pointer */ + VkvgDevice dev; /* vkvg device associated with this text run */ + vkvg_text_extents_t extents; /* store computed text extends */ + const char* text; /* utf8 char array of text*/ unsigned int glyph_count;/* Total glyph count */ hb_glyph_position_t *glyph_pos; /* HarfBuzz computed glyph positions array */ } vkvg_text_run_t; //Create font cache. -void _init_fonts_cache (VkvgDevice dev); +void _init_fonts_cache (VkvgDevice dev); //Release all ressources of font cache. void _destroy_font_cache (VkvgDevice dev); //Select current font for context from font name, create new font entry in cache if required @@ -129,15 +129,15 @@ void _select_font_path (VkvgContext ctx, const char* fontFile); //Draw text void _show_text (VkvgContext ctx, const char* text); //Get text dimmensions -void _text_extents (VkvgContext ctx, const char* text, vkvg_text_extents_t *extents); +void _text_extents (VkvgContext ctx, const char* text, vkvg_text_extents_t *extents); //Get font global dimmensions -void _font_extents (VkvgContext ctx, vkvg_font_extents_t* extents); +void _font_extents (VkvgContext ctx, vkvg_font_extents_t* extents); //Create text object that could be drawn multiple times minimizing harfbuzz and compute processing. -void _create_text_run (VkvgContext ctx, const char* text, VkvgText textRun); +void _create_text_run (VkvgContext ctx, const char* text, VkvgText textRun); //Release ressources held by a text run. -void _destroy_text_run (VkvgText textRun); +void _destroy_text_run (VkvgText textRun); //Draw text run -void _show_text_run (VkvgContext ctx, VkvgText tr); +void _show_text_run (VkvgContext ctx, VkvgText tr); //Trigger stagging buffer to be uploaded in font cache. Groupping upload improve performances. -void _flush_chars_to_tex (VkvgDevice dev, _vkvg_font_t* f); +void _flush_chars_to_tex (VkvgDevice dev, _vkvg_font_t* f); #endif diff --git a/src/vkvg_internal.h b/src/vkvg_internal.h index f49e389..ab1c3ee 100644 --- a/src/vkvg_internal.h +++ b/src/vkvg_internal.h @@ -35,9 +35,9 @@ #define _USE_MATH_DEFINES #include -#define M_PIF 3.14159265358979323846f /* float pi */ -#define M_PIF_2 1.57079632679489661923f -#define M_2_PIF 0.63661977236758134308f // 2/pi +#define M_PIF 3.14159265358979323846f /* float pi */ +#define M_PIF_2 1.57079632679489661923f +#define M_2_PIF 0.63661977236758134308f // 2/pi /*#ifndef M_2_PI #define M_2_PI 0.63661977236758134308 // 2/pi #endif*/ @@ -48,11 +48,11 @@ #define LOG #endif -#define PATH_CLOSED_BIT 0x80000000 /* most significant bit of path elmts is closed/open path state */ -#define PATH_HAS_CURVES_BIT 0x40000000 /* 2rd most significant bit of path elmts is curved status - * for main path, this indicate that curve datas are present. - * For segments, this indicate that the segment is curved or not */ -#define PATH_ELT_MASK 0x3FFFFFFF /* Bit mask for fetching path element value */ +#define PATH_CLOSED_BIT 0x80000000 /* most significant bit of path elmts is closed/open path state */ +#define PATH_HAS_CURVES_BIT 0x40000000 /* 2rd most significant bit of path elmts is curved status + * for main path, this indicate that curve datas are present. + * For segments, this indicate that the segment is curved or not */ +#define PATH_ELT_MASK 0x3FFFFFFF /* Bit mask for fetching path element value */ #define ROUNDF(f, c) (((float)((int)((f) * (c))) / (c))) #define ROUND_DOWN(v,p) (floorf(v * p) / p) @@ -68,6 +68,6 @@ //used to store clipping bit on context saving. 8 bit stencil will allow 6 save/restore layer #define FB_COLOR_FORMAT VK_FORMAT_B8G8R8A8_UNORM #define VKVG_SURFACE_IMGS_REQUIREMENTS VK_IMAGE_USAGE_SAMPLED_BIT|VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT|\ - VK_IMAGE_USAGE_TRANSFER_DST_BIT|VK_IMAGE_USAGE_TRANSFER_SRC_BIT|VK_FORMAT_FEATURE_BLIT_SRC_BIT + VK_IMAGE_USAGE_TRANSFER_DST_BIT|VK_IMAGE_USAGE_TRANSFER_SRC_BIT|VK_FORMAT_FEATURE_BLIT_SRC_BIT #define VKVG_FENCE_TIMEOUT UINT64_MAX #endif diff --git a/src/vkvg_matrix.c b/src/vkvg_matrix.c index 6094efc..a76ab36 100644 --- a/src/vkvg_matrix.c +++ b/src/vkvg_matrix.c @@ -79,11 +79,11 @@ void _vkvg_matrix_get_affine (const vkvg_matrix_t *matrix, float *xy, float *yy, float *x0, float *y0) { - *xx = matrix->xx; - *yx = matrix->yx; + *xx = matrix->xx; + *yx = matrix->yx; - *xy = matrix->xy; - *yy = matrix->yy; + *xy = matrix->xy; + *yy = matrix->yy; if (x0) *x0 = matrix->x0; @@ -184,7 +184,7 @@ void vkvg_matrix_init_translate (vkvg_matrix_t *matrix, float tx, float ty) void vkvg_matrix_init_scale (vkvg_matrix_t *matrix, float sx, float sy) { vkvg_matrix_init (matrix, - sx, 0, + sx, 0, 0, sy, 0, 0); } diff --git a/src/vkvg_pattern.c b/src/vkvg_pattern.c index 76dc30e..73239e9 100644 --- a/src/vkvg_pattern.c +++ b/src/vkvg_pattern.c @@ -119,4 +119,3 @@ void vkvg_pattern_destroy(VkvgPattern pat) free(pat); } - diff --git a/src/vkvg_pattern.h b/src/vkvg_pattern.h index 7aff2e0..76c5006 100644 --- a/src/vkvg_pattern.h +++ b/src/vkvg_pattern.h @@ -27,18 +27,18 @@ #include "vkh.h" typedef struct _vkvg_pattern_t { - vkvg_pattern_type_t type; - vkvg_extend_t extend; - vkvg_filter_t filter; - uint32_t references; - void* data; + vkvg_pattern_type_t type; + vkvg_extend_t extend; + vkvg_filter_t filter; + uint32_t references; + void* data; }vkvg_pattern_t; typedef struct _vkvg_gradient_t { - vec4 cp[3];//two first are normal cp, third are radiuses for radial - vkvg_color_t colors[16]; - vec4 stops[16]; - uint32_t count; + vec4 cp[3];//two first are normal cp, third are radiuses for radial + vkvg_color_t colors[16]; + vec4 stops[16]; + uint32_t count; }vkvg_gradient_t; #endif diff --git a/src/vkvg_surface.c b/src/vkvg_surface.c index 802f75e..34a89b8 100644 --- a/src/vkvg_surface.c +++ b/src/vkvg_surface.c @@ -63,8 +63,8 @@ VkvgSurface vkvg_surface_create_for_VkhImage (VkvgDevice dev, void* vkhImg) { VK_SAMPLER_MIPMAP_MODE_NEAREST,VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE); _create_surface_secondary_images (surf); - _create_framebuffer (surf); - _clear_surface (surf, VK_IMAGE_ASPECT_STENCIL_BIT); + _create_framebuffer (surf); + _clear_surface (surf, VK_IMAGE_ASPECT_STENCIL_BIT); surf->references = 1; vkvg_device_reference (surf->dev); @@ -129,7 +129,7 @@ VkvgSurface vkvg_surface_create_from_bitmap (VkvgDevice dev, unsigned char* img, .dstSubresource = imgSubResLayers, .dstOffsets[1] = {(int32_t)surf->width, (int32_t)surf->height, 1}, }; - vkCmdBlitImage (cmd, + vkCmdBlitImage (cmd, vkh_image_get_vkimage (stagImg), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, vkh_image_get_vkimage (tmpImg), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &blit, VK_FILTER_LINEAR); @@ -137,21 +137,21 @@ VkvgSurface vkvg_surface_create_from_bitmap (VkvgDevice dev, unsigned char* img, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); - vkh_cmd_end (cmd); - _submit_cmd (dev, &cmd, dev->fence); + vkh_cmd_end (cmd); + _submit_cmd (dev, &cmd, dev->fence); //don't reset fence after completion as this is the last cmd. (signaled idle fence) vkWaitForFences (dev->vkDev, 1, &dev->fence, VK_TRUE, UINT64_MAX); vkvg_buffer_destroy (&buff); - vkh_image_destroy (stagImg); + vkh_image_destroy (stagImg); surf->new = false; //create tmp context with rendering pipeline to create the multisample img VkvgContext ctx = vkvg_create (surf); -/* VkClearAttachment ca = {VK_IMAGE_ASPECT_COLOR_BIT,0, { 0.0f, 0.0f, 0.0f, 0.0f }}; +/* VkClearAttachment ca = {VK_IMAGE_ASPECT_COLOR_BIT,0, { 0.0f, 0.0f, 0.0f, 0.0f }}; VkClearRect cr = {{{0,0},{surf->width,surf->height}},0,1}; vkCmdClearAttachments(ctx->cmd, 1, &ca, 1, &cr);*/ @@ -163,10 +163,10 @@ VkvgSurface vkvg_surface_create_from_bitmap (VkvgDevice dev, unsigned char* img, _update_descriptor_set (ctx, tmpImg, ctx->dsSrc); _ensure_renderpass_is_started (ctx); - vkvg_paint (ctx); - vkvg_destroy (ctx); + vkvg_paint (ctx); + vkvg_destroy (ctx); - vkh_image_destroy (tmpImg); + vkh_image_destroy (tmpImg); surf->references = 1; vkvg_device_reference (surf->dev); @@ -236,8 +236,7 @@ uint32_t vkvg_surface_get_height (VkvgSurface surf) { return surf->height; } -void vkvg_surface_write_to_png (VkvgSurface surf, const char* path){ - uint32_t stride = surf->width * 4; +void vkvg_surface_write_to_png (VkvgSurface surf, const char* path){ VkImageSubresourceLayers imgSubResLayers = {VK_IMAGE_ASPECT_COLOR_BIT,0,0,1}; VkvgDevice dev = surf->dev; @@ -263,16 +262,18 @@ void vkvg_surface_write_to_png (VkvgSurface surf, const char* path){ .dstSubresource = imgSubResLayers, .dstOffsets[1] = {(int32_t)surf->width, (int32_t)surf->height, 1}, }; - vkCmdBlitImage (cmd, + vkCmdBlitImage (cmd, vkh_image_get_vkimage (surf->img), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, vkh_image_get_vkimage (stagImg), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &blit, VK_FILTER_NEAREST); - vkh_cmd_end (cmd); - _submit_cmd (dev, &cmd, dev->fence); + vkh_cmd_end (cmd); + _submit_cmd (dev, &cmd, dev->fence); vkWaitForFences (dev->vkDev, 1, &dev->fence, VK_TRUE, UINT64_MAX); void* img = vkh_image_map (stagImg); + uint64_t stride = vkh_image_get_stride(stagImg); + stbi_write_png (path, (int32_t)surf->width, (int32_t)surf->height, 4, img, (int32_t)stride); vkh_image_unmap (stagImg); @@ -280,12 +281,11 @@ void vkvg_surface_write_to_png (VkvgSurface surf, const char* path){ } void vkvg_surface_write_to_memory (VkvgSurface surf, unsigned char* const bitmap){ - uint32_t stride = surf->width * 4; VkImageSubresourceLayers imgSubResLayers = {VK_IMAGE_ASPECT_COLOR_BIT,0,0,1}; VkvgDevice dev = surf->dev; //RGBA to blit to, surf img is bgra - VkhImage stagImg= vkh_image_create ((VkhDevice)surf->dev,VK_FORMAT_R8G8B8A8_UNORM,surf->width,surf->height,VK_IMAGE_TILING_LINEAR, + VkhImage stagImg= vkh_image_create ((VkhDevice)surf->dev,VK_FORMAT_B8G8R8A8_UNORM ,surf->width,surf->height,VK_IMAGE_TILING_LINEAR, VMA_MEMORY_USAGE_GPU_TO_CPU, VK_IMAGE_USAGE_TRANSFER_SRC_BIT|VK_IMAGE_USAGE_TRANSFER_DST_BIT); @@ -306,16 +306,25 @@ void vkvg_surface_write_to_memory (VkvgSurface surf, unsigned char* const bitmap .dstSubresource = imgSubResLayers, .dstOffsets[1] = {(int32_t)surf->width, (int32_t)surf->height, 1}, }; - vkCmdBlitImage (cmd, + vkCmdBlitImage (cmd, vkh_image_get_vkimage (surf->img), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, vkh_image_get_vkimage (stagImg), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &blit, VK_FILTER_NEAREST); - vkh_cmd_end (cmd); - _submit_cmd (dev, &cmd, dev->fence); + vkh_cmd_end (cmd); + _submit_cmd (dev, &cmd, dev->fence); vkWaitForFences (dev->vkDev, 1, &dev->fence, VK_TRUE, UINT64_MAX); + uint64_t stride = vkh_image_get_stride(stagImg); + uint32_t dest_stride = surf->width * 4; + void* img = vkh_image_map (stagImg); - memcpy(bitmap, img, surf->height * stride); + void* row = bitmap; + for (uint32_t y = 0; y < surf->height; y++) { + memcpy(row, img, dest_stride); + row += dest_stride; + img += stride; + } + vkh_image_unmap (stagImg); vkh_image_destroy (stagImg); } diff --git a/src/vkvg_surface_internal.c b/src/vkvg_surface_internal.c index 6f8a651..470965e 100644 --- a/src/vkvg_surface_internal.c +++ b/src/vkvg_surface_internal.c @@ -25,7 +25,7 @@ #include "vkh_image.h" void _explicit_ms_resolve (VkvgSurface surf){ - VkvgDevice dev = surf->dev; + VkvgDevice dev = surf->dev; VkCommandBuffer cmd = dev->cmd; _wait_and_reset_device_fence (dev); @@ -58,7 +58,7 @@ void _explicit_ms_resolve (VkvgSurface surf){ void _clear_surface (VkvgSurface surf, VkImageAspectFlags aspect) { - VkvgDevice dev = surf->dev; + VkvgDevice dev = surf->dev; VkCommandBuffer cmd = dev->cmd; _wait_and_reset_device_fence (dev); @@ -127,7 +127,7 @@ void _create_surface_secondary_images (VkvgSurface surf) { vkh_device_set_object_name((VkhDevice)surf->dev, VK_OBJECT_TYPE_SAMPLER, (uint64_t)vkh_image_get_sampler(surf->imgMS), "SURF MS color SAMPLER"); #endif } - surf->stencil = vkh_image_ms_create((VkhDevice)surf->dev,surf->dev->stencilFormat,surf->dev->samples,surf->width,surf->height,VMA_MEMORY_USAGE_GPU_ONLY, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT|VK_IMAGE_USAGE_TRANSFER_DST_BIT|VK_IMAGE_USAGE_TRANSFER_SRC_BIT); + surf->stencil = vkh_image_ms_create((VkhDevice)surf->dev,surf->dev->stencilFormat,surf->dev->samples,surf->width,surf->height,VMA_MEMORY_USAGE_GPU_ONLY, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT|VK_IMAGE_USAGE_TRANSFER_DST_BIT|VK_IMAGE_USAGE_TRANSFER_SRC_BIT); vkh_image_create_descriptor(surf->stencil, VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_ASPECT_STENCIL_BIT, VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST,VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE); #if defined(DEBUG) && defined (VKVG_DBG_UTILS) @@ -162,11 +162,11 @@ void _create_framebuffer (VkvgSurface surf) { } void _create_surface_images (VkvgSurface surf) { - _create_surface_main_image (surf); + _create_surface_main_image (surf); _create_surface_secondary_images(surf); - _create_framebuffer (surf); + _create_framebuffer (surf); - _clear_surface (surf, VK_IMAGE_ASPECT_STENCIL_BIT); + _clear_surface (surf, VK_IMAGE_ASPECT_STENCIL_BIT); #if defined(DEBUG) && defined(ENABLE_VALIDATION) vkh_image_set_name(surf->img, "surfImg"); vkh_image_set_name(surf->imgMS, "surfImgMS"); diff --git a/src/vkvg_surface_internal.h b/src/vkvg_surface_internal.h index 5919cbc..e6b474c 100644 --- a/src/vkvg_surface_internal.h +++ b/src/vkvg_surface_internal.h @@ -30,13 +30,13 @@ typedef struct _vkvg_surface_t { VkvgDevice dev; uint32_t width; uint32_t height; - VkFormat format; + VkFormat format; VkFramebuffer fb; VkhImage img; VkhImage imgMS; VkhImage stencil; - uint32_t references; - bool new; + uint32_t references; + bool new; }vkvg_surface; void _explicit_ms_resolve (VkvgSurface surf); diff --git a/vkh b/vkh index 403f694..dde09d8 160000 --- a/vkh +++ b/vkh @@ -1 +1 @@ -Subproject commit 403f694bf90455f13b115ea7cf58495f58bde6cc +Subproject commit dde09d88d7c6554b39d46bccaf8b392e2a42d937 -- 2.47.3