Improve performance of Unicode conversions (#891)
This commit is contained in:
parent
9195f84f60
commit
2f92a262dc
|
@ -97,7 +97,7 @@ size_t calc_utf8_missing( _Inout_ sqlsrv_stmt* stmt, _In_reads_(buffer_end) cons
|
|||
bool check_for_next_stream_parameter( _Inout_ sqlsrv_stmt* stmt TSRMLS_DC );
|
||||
bool convert_input_param_to_utf16( _In_ zval* input_param_z, _Inout_ zval* convert_param_z );
|
||||
void core_get_field_common(_Inout_ sqlsrv_stmt* stmt, _In_ SQLUSMALLINT field_index, _Inout_ sqlsrv_phptype
|
||||
sqlsrv_php_type, _Inout_updates_bytes_(*field_len) void*& field_value, _Inout_ SQLLEN* field_len TSRMLS_DC);
|
||||
sqlsrv_php_type, _Inout_updates_bytes_(*field_len) void*& field_value, _Inout_ SQLLEN* field_len TSRMLS_DC);
|
||||
// returns the ODBC C type constant that matches the PHP type and encoding given
|
||||
SQLSMALLINT default_c_type( _Inout_ sqlsrv_stmt* stmt, _In_opt_ SQLULEN paramno, _In_ zval const* param_z, _In_ SQLSRV_ENCODING encoding TSRMLS_DC );
|
||||
void default_sql_size_and_scale( _Inout_ sqlsrv_stmt* stmt, _In_opt_ unsigned int paramno, _In_ zval* param_z, _In_ SQLSRV_ENCODING encoding,
|
||||
|
@ -110,7 +110,7 @@ void field_cache_dtor( _Inout_ zval* data_z );
|
|||
void format_decimal_numbers(_In_ SQLSMALLINT decimals_places, _In_ SQLSMALLINT field_scale, _Inout_updates_bytes_(*field_len) char*& field_value, _Inout_ SQLLEN* field_len);
|
||||
void finalize_output_parameters( _Inout_ sqlsrv_stmt* stmt TSRMLS_DC );
|
||||
void get_field_as_string( _Inout_ sqlsrv_stmt* stmt, _In_ SQLUSMALLINT field_index, _Inout_ sqlsrv_phptype sqlsrv_php_type,
|
||||
_Inout_updates_bytes_(*field_len) void*& field_value, _Inout_ SQLLEN* field_len TSRMLS_DC );
|
||||
_Inout_updates_bytes_(*field_len) void*& field_value, _Inout_ SQLLEN* field_len TSRMLS_DC );
|
||||
stmt_option const* get_stmt_option( sqlsrv_conn const* conn, _In_ zend_ulong key, _In_ const stmt_option stmt_opts[] TSRMLS_DC );
|
||||
bool is_valid_sqlsrv_phptype( _In_ sqlsrv_phptype type );
|
||||
// assure there is enough space for the output parameter string
|
||||
|
@ -150,7 +150,7 @@ sqlsrv_stmt::sqlsrv_stmt( _In_ sqlsrv_conn* c, _In_ SQLHANDLE handle, _In_ error
|
|||
current_stream( NULL, SQLSRV_ENCODING_DEFAULT ),
|
||||
current_stream_read( 0 )
|
||||
{
|
||||
ZVAL_UNDEF( &active_stream );
|
||||
ZVAL_UNDEF( &active_stream );
|
||||
// initialize the input string parameters array (which holds zvals)
|
||||
core::sqlsrv_array_init( *conn, ¶m_input_strings TSRMLS_CC );
|
||||
|
||||
|
@ -262,7 +262,7 @@ void sqlsrv_stmt::new_result_set( TSRMLS_D )
|
|||
sqlsrv_stmt* core_sqlsrv_create_stmt( _Inout_ sqlsrv_conn* conn, _In_ driver_stmt_factory stmt_factory, _In_opt_ HashTable* options_ht,
|
||||
_In_opt_ const stmt_option valid_stmt_opts[], _In_ error_callback const err, _In_opt_ void* driver TSRMLS_DC )
|
||||
{
|
||||
sqlsrv_malloc_auto_ptr<sqlsrv_stmt> stmt;
|
||||
sqlsrv_malloc_auto_ptr<sqlsrv_stmt> stmt;
|
||||
SQLHANDLE stmt_h = SQL_NULL_HANDLE;
|
||||
sqlsrv_stmt* return_stmt = NULL;
|
||||
|
||||
|
@ -280,26 +280,26 @@ sqlsrv_stmt* core_sqlsrv_create_stmt( _Inout_ sqlsrv_conn* conn, _In_ driver_stm
|
|||
|
||||
// process the options array given to core_sqlsrv_prepare.
|
||||
if( options_ht && zend_hash_num_elements( options_ht ) > 0 && valid_stmt_opts ) {
|
||||
zend_ulong index = -1;
|
||||
zend_string *key = NULL;
|
||||
zval* value_z = NULL;
|
||||
zend_ulong index = -1;
|
||||
zend_string *key = NULL;
|
||||
zval* value_z = NULL;
|
||||
|
||||
ZEND_HASH_FOREACH_KEY_VAL( options_ht, index, key, value_z ) {
|
||||
ZEND_HASH_FOREACH_KEY_VAL( options_ht, index, key, value_z ) {
|
||||
|
||||
int type = key ? HASH_KEY_IS_STRING : HASH_KEY_IS_LONG;
|
||||
int type = key ? HASH_KEY_IS_STRING : HASH_KEY_IS_LONG;
|
||||
|
||||
// The driver layer should ensure a valid key.
|
||||
DEBUG_SQLSRV_ASSERT(( type == HASH_KEY_IS_LONG ), "allocate_stmt: Invalid statment option key provided." );
|
||||
// The driver layer should ensure a valid key.
|
||||
DEBUG_SQLSRV_ASSERT(( type == HASH_KEY_IS_LONG ), "allocate_stmt: Invalid statment option key provided." );
|
||||
|
||||
const stmt_option* stmt_opt = get_stmt_option( stmt->conn, index, valid_stmt_opts TSRMLS_CC );
|
||||
const stmt_option* stmt_opt = get_stmt_option( stmt->conn, index, valid_stmt_opts TSRMLS_CC );
|
||||
|
||||
// if the key didn't match, then return the error to the script.
|
||||
// The driver layer should ensure that the key is valid.
|
||||
DEBUG_SQLSRV_ASSERT( stmt_opt != NULL, "allocate_stmt: unexpected null value for statement option." );
|
||||
// if the key didn't match, then return the error to the script.
|
||||
// The driver layer should ensure that the key is valid.
|
||||
DEBUG_SQLSRV_ASSERT( stmt_opt != NULL, "allocate_stmt: unexpected null value for statement option." );
|
||||
|
||||
// perform the actions the statement option needs done.
|
||||
(*stmt_opt->func)( stmt, stmt_opt, value_z TSRMLS_CC );
|
||||
} ZEND_HASH_FOREACH_END();
|
||||
// perform the actions the statement option needs done.
|
||||
(*stmt_opt->func)( stmt, stmt_opt, value_z TSRMLS_CC );
|
||||
} ZEND_HASH_FOREACH_END();
|
||||
}
|
||||
|
||||
return_stmt = stmt;
|
||||
|
@ -495,7 +495,7 @@ void core_sqlsrv_bind_param( _Inout_ sqlsrv_stmt* stmt, _In_ SQLUSMALLINT param_
|
|||
ind_ptr = buffer_len;
|
||||
if( direction != SQL_PARAM_INPUT ){
|
||||
// save the parameter so that 1) the buffer doesn't go away, and 2) we can set it to NULL if returned
|
||||
sqlsrv_output_param output_param( param_ref, static_cast<int>( param_num ), zval_was_bool, php_out_type);
|
||||
sqlsrv_output_param output_param( param_ref, static_cast<int>( param_num ), zval_was_bool, php_out_type);
|
||||
save_output_param_for_later( stmt, output_param TSRMLS_CC );
|
||||
}
|
||||
}
|
||||
|
@ -503,11 +503,11 @@ void core_sqlsrv_bind_param( _Inout_ sqlsrv_stmt* stmt, _In_ SQLUSMALLINT param_
|
|||
case IS_DOUBLE:
|
||||
{
|
||||
buffer = ¶m_z->value;
|
||||
buffer_len = sizeof( Z_DVAL_P( param_z ));
|
||||
buffer_len = sizeof( Z_DVAL_P( param_z ));
|
||||
ind_ptr = buffer_len;
|
||||
if( direction != SQL_PARAM_INPUT ){
|
||||
// save the parameter so that 1) the buffer doesn't go away, and 2) we can set it to NULL if returned
|
||||
sqlsrv_output_param output_param( param_ref, static_cast<int>( param_num ), zval_was_bool, php_out_type);
|
||||
sqlsrv_output_param output_param( param_ref, static_cast<int>( param_num ), zval_was_bool, php_out_type);
|
||||
save_output_param_for_later( stmt, output_param TSRMLS_CC );
|
||||
}
|
||||
}
|
||||
|
@ -621,10 +621,10 @@ void core_sqlsrv_bind_param( _Inout_ sqlsrv_stmt* stmt, _In_ SQLUSMALLINT param_
|
|||
zval buffer_z;
|
||||
zval format_z;
|
||||
zval params[1];
|
||||
ZVAL_UNDEF( &function_z );
|
||||
ZVAL_UNDEF( &buffer_z );
|
||||
ZVAL_UNDEF( &format_z );
|
||||
ZVAL_UNDEF( params );
|
||||
ZVAL_UNDEF( &function_z );
|
||||
ZVAL_UNDEF( &buffer_z );
|
||||
ZVAL_UNDEF( &format_z );
|
||||
ZVAL_UNDEF( params );
|
||||
|
||||
bool valid_class_name_found = false;
|
||||
|
||||
|
@ -653,23 +653,23 @@ void core_sqlsrv_bind_param( _Inout_ sqlsrv_stmt* stmt, _In_ SQLUSMALLINT param_
|
|||
// meaning there is too much information in the character string. If the user specifies the 'datetimeoffset'
|
||||
// sql type, it lacks the timezone.
|
||||
if( sql_type == SQL_SS_TIMESTAMPOFFSET ){
|
||||
core::sqlsrv_zval_stringl( &format_z, const_cast<char*>( DateTime::DATETIMEOFFSET_FORMAT ),
|
||||
core::sqlsrv_zval_stringl( &format_z, const_cast<char*>( DateTime::DATETIMEOFFSET_FORMAT ),
|
||||
DateTime::DATETIMEOFFSET_FORMAT_LEN );
|
||||
}
|
||||
else if( sql_type == SQL_TYPE_DATE ){
|
||||
core::sqlsrv_zval_stringl( &format_z, const_cast<char*>( DateTime::DATE_FORMAT ), DateTime::DATE_FORMAT_LEN );
|
||||
core::sqlsrv_zval_stringl( &format_z, const_cast<char*>( DateTime::DATE_FORMAT ), DateTime::DATE_FORMAT_LEN );
|
||||
}
|
||||
else{
|
||||
core::sqlsrv_zval_stringl( &format_z, const_cast<char*>( DateTime::DATETIME_FORMAT ), DateTime::DATETIME_FORMAT_LEN );
|
||||
core::sqlsrv_zval_stringl( &format_z, const_cast<char*>( DateTime::DATETIME_FORMAT ), DateTime::DATETIME_FORMAT_LEN );
|
||||
}
|
||||
// call the DateTime::format member function to convert the object to a string that SQL Server understands
|
||||
core::sqlsrv_zval_stringl( &function_z, "format", sizeof( "format" ) - 1 );
|
||||
core::sqlsrv_zval_stringl( &function_z, "format", sizeof( "format" ) - 1 );
|
||||
params[0] = format_z;
|
||||
// This is equivalent to the PHP code: $param_z->format( $format_z ); where param_z is the
|
||||
// DateTime object and $format_z is the format string.
|
||||
int zr = call_user_function( EG( function_table ), param_z, &function_z, &buffer_z, 1, params TSRMLS_CC );
|
||||
zend_string_release( Z_STR( format_z ));
|
||||
zend_string_release( Z_STR( function_z ));
|
||||
zend_string_release( Z_STR( format_z ));
|
||||
zend_string_release( Z_STR( function_z ));
|
||||
CHECK_CUSTOM_ERROR( zr == FAILURE, stmt, SQLSRV_ERROR_INVALID_PARAMETER_PHPTYPE, param_num + 1 ){
|
||||
throw core::CoreException();
|
||||
}
|
||||
|
@ -696,7 +696,7 @@ void core_sqlsrv_bind_param( _Inout_ sqlsrv_stmt* stmt, _In_ SQLUSMALLINT param_
|
|||
}
|
||||
|
||||
core::SQLBindParameter( stmt, param_num + 1, direction,
|
||||
c_type, sql_type, column_size, decimal_digits, buffer, buffer_len, &ind_ptr TSRMLS_CC );
|
||||
c_type, sql_type, column_size, decimal_digits, buffer, buffer_len, &ind_ptr TSRMLS_CC );
|
||||
if ( stmt->conn->ce_option.enabled && sql_type == SQL_TYPE_TIMESTAMP )
|
||||
{
|
||||
if( decimal_digits == 3 )
|
||||
|
@ -885,14 +885,14 @@ field_meta_data* core_sqlsrv_field_metadata( _Inout_ sqlsrv_stmt* stmt, _In_ SQL
|
|||
meta_data = new ( sqlsrv_malloc( sizeof( field_meta_data ))) field_meta_data();
|
||||
field_name_temp = static_cast<SQLWCHAR*>( sqlsrv_malloc( ( SS_MAXCOLNAMELEN + 1 ) * sizeof( SQLWCHAR ) ));
|
||||
SQLSRV_ENCODING encoding = ( (stmt->encoding() == SQLSRV_ENCODING_DEFAULT ) ? stmt->conn->encoding() : stmt->encoding());
|
||||
try{
|
||||
try{
|
||||
core::SQLDescribeColW( stmt, colno + 1, field_name_temp, SS_MAXCOLNAMELEN + 1, &field_len_temp,
|
||||
&( meta_data->field_type ), & ( meta_data->field_size ), & ( meta_data->field_scale ),
|
||||
&( meta_data->field_is_nullable ) TSRMLS_CC );
|
||||
}
|
||||
catch ( core::CoreException& e ) {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
catch ( core::CoreException& e ) {
|
||||
throw e;
|
||||
}
|
||||
|
||||
bool converted = convert_string_from_utf16( encoding, field_name_temp, field_len_temp, ( char** ) &( meta_data->field_name ), field_name_len );
|
||||
|
||||
|
@ -960,50 +960,50 @@ field_meta_data* core_sqlsrv_field_metadata( _Inout_ sqlsrv_stmt* stmt, _In_ SQL
|
|||
// Nothing, excpetion thrown if an error occurs
|
||||
|
||||
void core_sqlsrv_get_field( _Inout_ sqlsrv_stmt* stmt, _In_ SQLUSMALLINT field_index, _In_ sqlsrv_phptype sqlsrv_php_type_in, _In_ bool prefer_string,
|
||||
_Outref_result_bytebuffer_maybenull_(*field_len) void*& field_value, _Inout_ SQLLEN* field_len, _In_ bool cache_field,
|
||||
_Out_ SQLSRV_PHPTYPE *sqlsrv_php_type_out TSRMLS_DC)
|
||||
_Outref_result_bytebuffer_maybenull_(*field_len) void*& field_value, _Inout_ SQLLEN* field_len, _In_ bool cache_field,
|
||||
_Out_ SQLSRV_PHPTYPE *sqlsrv_php_type_out TSRMLS_DC)
|
||||
{
|
||||
try {
|
||||
try {
|
||||
|
||||
// close the stream to release the resource
|
||||
close_active_stream(stmt TSRMLS_CC);
|
||||
// close the stream to release the resource
|
||||
close_active_stream(stmt TSRMLS_CC);
|
||||
|
||||
// if the field has been retrieved before, return the previous result
|
||||
field_cache* cached = NULL;
|
||||
if (NULL != ( cached = static_cast<field_cache*>( zend_hash_index_find_ptr( Z_ARRVAL( stmt->field_cache ), static_cast<zend_ulong>( field_index ))))) {
|
||||
// the field value is NULL
|
||||
if( cached->value == NULL ) {
|
||||
field_value = NULL;
|
||||
*field_len = 0;
|
||||
if( sqlsrv_php_type_out ) { *sqlsrv_php_type_out = SQLSRV_PHPTYPE_NULL; }
|
||||
}
|
||||
else {
|
||||
// if the field has been retrieved before, return the previous result
|
||||
field_cache* cached = NULL;
|
||||
if (NULL != ( cached = static_cast<field_cache*>( zend_hash_index_find_ptr( Z_ARRVAL( stmt->field_cache ), static_cast<zend_ulong>( field_index ))))) {
|
||||
// the field value is NULL
|
||||
if( cached->value == NULL ) {
|
||||
field_value = NULL;
|
||||
*field_len = 0;
|
||||
if( sqlsrv_php_type_out ) { *sqlsrv_php_type_out = SQLSRV_PHPTYPE_NULL; }
|
||||
}
|
||||
else {
|
||||
|
||||
field_value = sqlsrv_malloc( cached->len, sizeof( char ), 1 );
|
||||
memcpy_s( field_value, ( cached->len * sizeof( char )), cached->value, cached->len );
|
||||
if( cached->type.typeinfo.type == SQLSRV_PHPTYPE_STRING) {
|
||||
// prevent the 'string not null terminated' warning
|
||||
reinterpret_cast<char*>( field_value )[cached->len] = '\0';
|
||||
}
|
||||
*field_len = cached->len;
|
||||
if( sqlsrv_php_type_out) { *sqlsrv_php_type_out = static_cast<SQLSRV_PHPTYPE>(cached->type.typeinfo.type); }
|
||||
}
|
||||
return;
|
||||
}
|
||||
field_value = sqlsrv_malloc( cached->len, sizeof( char ), 1 );
|
||||
memcpy_s( field_value, ( cached->len * sizeof( char )), cached->value, cached->len );
|
||||
if( cached->type.typeinfo.type == SQLSRV_PHPTYPE_STRING) {
|
||||
// prevent the 'string not null terminated' warning
|
||||
reinterpret_cast<char*>( field_value )[cached->len] = '\0';
|
||||
}
|
||||
*field_len = cached->len;
|
||||
if( sqlsrv_php_type_out) { *sqlsrv_php_type_out = static_cast<SQLSRV_PHPTYPE>(cached->type.typeinfo.type); }
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
sqlsrv_phptype sqlsrv_php_type = sqlsrv_php_type_in;
|
||||
sqlsrv_phptype sqlsrv_php_type = sqlsrv_php_type_in;
|
||||
|
||||
SQLLEN sql_field_type = 0;
|
||||
SQLLEN sql_field_len = 0;
|
||||
SQLLEN sql_field_type = 0;
|
||||
SQLLEN sql_field_len = 0;
|
||||
|
||||
// Make sure that the statement was executed and not just prepared.
|
||||
CHECK_CUSTOM_ERROR( !stmt->executed, stmt, SQLSRV_ERROR_STATEMENT_NOT_EXECUTED ) {
|
||||
throw core::CoreException();
|
||||
}
|
||||
// Make sure that the statement was executed and not just prepared.
|
||||
CHECK_CUSTOM_ERROR( !stmt->executed, stmt, SQLSRV_ERROR_STATEMENT_NOT_EXECUTED ) {
|
||||
throw core::CoreException();
|
||||
}
|
||||
|
||||
// if the field is to be cached, and this field is being retrieved out of order, cache prior fields so they
|
||||
// may also be retrieved.
|
||||
if( cache_field && (field_index - stmt->last_field_index ) >= 2 ) {
|
||||
// if the field is to be cached, and this field is being retrieved out of order, cache prior fields so they
|
||||
// may also be retrieved.
|
||||
if( cache_field && (field_index - stmt->last_field_index ) >= 2 ) {
|
||||
sqlsrv_phptype invalid;
|
||||
invalid.typeinfo.type = SQLSRV_PHPTYPE_INVALID;
|
||||
for( int i = stmt->last_field_index + 1; i < field_index; ++i ) {
|
||||
|
@ -1033,27 +1033,27 @@ void core_sqlsrv_get_field( _Inout_ sqlsrv_stmt* stmt, _In_ SQLUSMALLINT field_i
|
|||
sqlsrv_php_type = stmt->sql_type_to_php_type(static_cast<SQLINTEGER>(sql_field_type), static_cast<SQLUINTEGER>(sql_field_len), prefer_string);
|
||||
}
|
||||
|
||||
// Verify that we have an acceptable type to convert.
|
||||
CHECK_CUSTOM_ERROR( !is_valid_sqlsrv_phptype( sqlsrv_php_type ), stmt, SQLSRV_ERROR_INVALID_TYPE ) {
|
||||
throw core::CoreException();
|
||||
}
|
||||
// Verify that we have an acceptable type to convert.
|
||||
CHECK_CUSTOM_ERROR( !is_valid_sqlsrv_phptype( sqlsrv_php_type ), stmt, SQLSRV_ERROR_INVALID_TYPE ) {
|
||||
throw core::CoreException();
|
||||
}
|
||||
|
||||
if( sqlsrv_php_type_out != NULL )
|
||||
*sqlsrv_php_type_out = static_cast<SQLSRV_PHPTYPE>( sqlsrv_php_type.typeinfo.type );
|
||||
if( sqlsrv_php_type_out != NULL )
|
||||
*sqlsrv_php_type_out = static_cast<SQLSRV_PHPTYPE>( sqlsrv_php_type.typeinfo.type );
|
||||
|
||||
// Retrieve the data
|
||||
core_get_field_common( stmt, field_index, sqlsrv_php_type, field_value, field_len TSRMLS_CC );
|
||||
// Retrieve the data
|
||||
core_get_field_common( stmt, field_index, sqlsrv_php_type, field_value, field_len TSRMLS_CC );
|
||||
|
||||
// if the user wants us to cache the field, we'll do it
|
||||
if( cache_field ) {
|
||||
field_cache cache( field_value, *field_len, sqlsrv_php_type );
|
||||
core::sqlsrv_zend_hash_index_update_mem( *stmt, Z_ARRVAL( stmt->field_cache ), field_index, &cache, sizeof(field_cache) TSRMLS_CC );
|
||||
}
|
||||
}
|
||||
// if the user wants us to cache the field, we'll do it
|
||||
if( cache_field ) {
|
||||
field_cache cache( field_value, *field_len, sqlsrv_php_type );
|
||||
core::sqlsrv_zend_hash_index_update_mem( *stmt, Z_ARRVAL( stmt->field_cache ), field_index, &cache, sizeof(field_cache) TSRMLS_CC );
|
||||
}
|
||||
}
|
||||
|
||||
catch( core::CoreException& e ) {
|
||||
throw e;
|
||||
}
|
||||
catch( core::CoreException& e ) {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
// core_sqlsrv_has_any_result
|
||||
|
@ -1347,14 +1347,14 @@ bool core_sqlsrv_send_stream_packet( _Inout_ sqlsrv_stmt* stmt TSRMLS_DC )
|
|||
// read the data from the stream, send it via SQLPutData and track how much we've sent.
|
||||
else {
|
||||
char buffer[PHP_STREAM_BUFFER_SIZE + 1] = {'\0'};
|
||||
std::size_t buffer_size = sizeof( buffer ) - 3; // -3 to preserve enough space for a cut off UTF-8 character
|
||||
std::size_t buffer_size = sizeof( buffer ) - 3; // -3 to preserve enough space for a cut off UTF-8 character
|
||||
std::size_t read = php_stream_read( param_stream, buffer, buffer_size );
|
||||
|
||||
if (read > UINT_MAX)
|
||||
{
|
||||
LOG(SEV_ERROR, "PHP stream: buffer length exceeded.");
|
||||
throw core::CoreException();
|
||||
}
|
||||
if (read > UINT_MAX)
|
||||
{
|
||||
LOG(SEV_ERROR, "PHP stream: buffer length exceeded.");
|
||||
throw core::CoreException();
|
||||
}
|
||||
|
||||
stmt->current_stream_read += static_cast<unsigned int>( read );
|
||||
if (read == 0) {
|
||||
|
@ -1374,8 +1374,8 @@ bool core_sqlsrv_send_stream_packet( _Inout_ sqlsrv_stmt* stmt TSRMLS_DC )
|
|||
// expansion of 2x the UTF-8 size.
|
||||
SQLWCHAR wbuffer[PHP_STREAM_BUFFER_SIZE + 1] = {L'\0'};
|
||||
int wbuffer_size = static_cast<int>( sizeof( wbuffer ) / sizeof( SQLWCHAR ));
|
||||
DWORD last_error_code = ERROR_SUCCESS;
|
||||
// buffer_size is the # of wchars. Since it set to stmt->param_buffer_size / 2, this is accurate
|
||||
DWORD last_error_code = ERROR_SUCCESS;
|
||||
// buffer_size is the # of wchars. Since it set to stmt->param_buffer_size / 2, this is accurate
|
||||
#ifndef _WIN32
|
||||
int wsize = SystemLocale::ToUtf16Strict( stmt->current_stream.encoding, buffer, static_cast<int>(read), wbuffer, wbuffer_size, &last_error_code );
|
||||
#else
|
||||
|
@ -1383,7 +1383,7 @@ bool core_sqlsrv_send_stream_packet( _Inout_ sqlsrv_stmt* stmt TSRMLS_DC )
|
|||
last_error_code = GetLastError();
|
||||
#endif // !_WIN32
|
||||
|
||||
if( wsize == 0 && last_error_code == ERROR_NO_UNICODE_TRANSLATION ) {
|
||||
if( wsize == 0 && last_error_code == ERROR_NO_UNICODE_TRANSLATION ) {
|
||||
|
||||
// this will calculate how many bytes were cut off from the last UTF-8 character and read that many more
|
||||
// in, then reattempt the conversion. If it fails the second time, then an error is returned.
|
||||
|
@ -1873,23 +1873,26 @@ bool convert_input_param_to_utf16( _In_ zval* input_param_z, _Inout_ zval* conve
|
|||
std::size_t buffer_len = Z_STRLEN_P( input_param_z );
|
||||
int wchar_size;
|
||||
|
||||
if (buffer_len > INT_MAX)
|
||||
{
|
||||
LOG(SEV_ERROR, "Convert input parameter to utf16: buffer length exceeded.");
|
||||
throw core::CoreException();
|
||||
}
|
||||
if (buffer_len > INT_MAX)
|
||||
{
|
||||
LOG(SEV_ERROR, "Convert input parameter to utf16: buffer length exceeded.");
|
||||
throw core::CoreException();
|
||||
}
|
||||
|
||||
// if the string is empty, then just return that the conversion succeeded as
|
||||
// MultiByteToWideChar will "fail" on an empty string.
|
||||
if( buffer_len == 0 ) {
|
||||
core::sqlsrv_zval_stringl( converted_param_z, "", 0 );
|
||||
core::sqlsrv_zval_stringl( converted_param_z, "", 0 );
|
||||
return true;
|
||||
}
|
||||
|
||||
// if the parameter is an input parameter, calc the size of the necessary buffer from the length of the string
|
||||
#ifndef _WIN32
|
||||
wchar_size = SystemLocale::ToUtf16Strict( CP_UTF8, reinterpret_cast<LPCSTR>( buffer ), static_cast<int>( buffer_len ), NULL, 0 );
|
||||
// Declare wchar_size to be the largest possible number of UTF-16 characters after
|
||||
// conversion, to avoid the performance penalty of calling ToUtf16
|
||||
wchar_size = buffer_len;
|
||||
#else
|
||||
// Calculate the size of the necessary buffer from the length of the string -
|
||||
// no performance penalty because MultiByteToWidechar is highly optimised
|
||||
wchar_size = MultiByteToWideChar( CP_UTF8, MB_ERR_INVALID_CHARS, reinterpret_cast<LPCSTR>( buffer ), static_cast<int>( buffer_len ), NULL, 0 );
|
||||
#endif // !_WIN32
|
||||
|
||||
|
@ -1901,17 +1904,18 @@ bool convert_input_param_to_utf16( _In_ zval* input_param_z, _Inout_ zval* conve
|
|||
wbuffer = reinterpret_cast<SQLWCHAR*>( sqlsrv_malloc( (wchar_size + 1) * sizeof( SQLWCHAR ) ));
|
||||
// convert the utf-8 string to a wchar string in the new buffer
|
||||
#ifndef _WIN32
|
||||
int r = SystemLocale::ToUtf16Strict( CP_UTF8, reinterpret_cast<LPCSTR>( buffer ), static_cast<int>( buffer_len ), wbuffer, wchar_size );
|
||||
int rc = SystemLocale::ToUtf16Strict( CP_UTF8, reinterpret_cast<LPCSTR>( buffer ), static_cast<int>( buffer_len ), wbuffer, wchar_size );
|
||||
#else
|
||||
int r = MultiByteToWideChar( CP_UTF8, MB_ERR_INVALID_CHARS, reinterpret_cast<LPCSTR>( buffer ), static_cast<int>( buffer_len ), wbuffer, wchar_size );
|
||||
int rc = MultiByteToWideChar( CP_UTF8, MB_ERR_INVALID_CHARS, reinterpret_cast<LPCSTR>( buffer ), static_cast<int>( buffer_len ), wbuffer, wchar_size );
|
||||
#endif // !_WIN32
|
||||
// if there was a problem converting the string, then free the memory and return false
|
||||
if( r == 0 ) {
|
||||
if( rc == 0 ) {
|
||||
return false;
|
||||
}
|
||||
wchar_size = rc;
|
||||
|
||||
// null terminate the string, set the size within the zval, and return success
|
||||
wbuffer[wchar_size] = L'\0';
|
||||
wbuffer[ wchar_size ] = L'\0';
|
||||
core::sqlsrv_zval_stringl( converted_param_z, reinterpret_cast<char*>( wbuffer.get() ), wchar_size * sizeof( SQLWCHAR ) );
|
||||
sqlsrv_free(wbuffer);
|
||||
wbuffer.transferred();
|
||||
|
@ -1995,7 +1999,7 @@ void default_sql_type( _Inout_ sqlsrv_stmt* stmt, _In_opt_ SQLULEN paramno, _In_
|
|||
_Out_ SQLSMALLINT& sql_type TSRMLS_DC )
|
||||
{
|
||||
sql_type = SQL_UNKNOWN_TYPE;
|
||||
int php_type = Z_TYPE_P(param_z);
|
||||
int php_type = Z_TYPE_P(param_z);
|
||||
switch( php_type ) {
|
||||
|
||||
case IS_NULL:
|
||||
|
@ -2135,7 +2139,7 @@ void field_cache_dtor( _Inout_ zval* data_z )
|
|||
{
|
||||
sqlsrv_free( cache->value );
|
||||
}
|
||||
sqlsrv_free( cache );
|
||||
sqlsrv_free( cache );
|
||||
}
|
||||
|
||||
// To be called for formatting decimal / numeric fetched values from finalize_output_parameters() and/or get_field_as_string()
|
||||
|
@ -2288,13 +2292,13 @@ void finalize_output_parameters( _Inout_ sqlsrv_stmt* stmt TSRMLS_DC )
|
|||
return;
|
||||
|
||||
HashTable* params_ht = Z_ARRVAL( stmt->output_params );
|
||||
zend_ulong index = -1;
|
||||
zend_string* key = NULL;
|
||||
void* output_param_temp = NULL;
|
||||
zend_ulong index = -1;
|
||||
zend_string* key = NULL;
|
||||
void* output_param_temp = NULL;
|
||||
|
||||
ZEND_HASH_FOREACH_KEY_PTR( params_ht, index, key, output_param_temp ) {
|
||||
sqlsrv_output_param* output_param = static_cast<sqlsrv_output_param*>( output_param_temp );
|
||||
zval* value_z = Z_REFVAL_P( output_param->param_z );
|
||||
ZEND_HASH_FOREACH_KEY_PTR( params_ht, index, key, output_param_temp ) {
|
||||
sqlsrv_output_param* output_param = static_cast<sqlsrv_output_param*>( output_param_temp );
|
||||
zval* value_z = Z_REFVAL_P( output_param->param_z );
|
||||
switch( Z_TYPE_P( value_z )) {
|
||||
case IS_STRING:
|
||||
{
|
||||
|
@ -2415,7 +2419,7 @@ void finalize_output_parameters( _Inout_ sqlsrv_stmt* stmt TSRMLS_DC )
|
|||
DIE( "Illegal or unknown output parameter type. This should have been caught in core_sqlsrv_bind_parameter." );
|
||||
break;
|
||||
}
|
||||
value_z = NULL;
|
||||
value_z = NULL;
|
||||
} ZEND_HASH_FOREACH_END();
|
||||
|
||||
// empty the hash table since it's been processed
|
||||
|
@ -2812,24 +2816,24 @@ void resize_output_buffer_if_necessary( _Inout_ sqlsrv_stmt* stmt, _Inout_ zval*
|
|||
// allocate enough space to ALWAYS include the NULL regardless of the type being retrieved since
|
||||
// we set the last byte(s) to be NULL to avoid the debug build warning from the Zend engine about
|
||||
// not having a NULL terminator on a string.
|
||||
zend_string* param_z_string = zend_string_realloc( Z_STR_P(param_z), expected_len, 0 );
|
||||
zend_string* param_z_string = zend_string_realloc( Z_STR_P(param_z), expected_len, 0 );
|
||||
|
||||
// A zval string len doesn't include the null. This calculates the length it should be
|
||||
// regardless of whether the ODBC type contains the NULL or not.
|
||||
|
||||
// null terminate the string to avoid a warning in debug PHP builds
|
||||
ZSTR_VAL(param_z_string)[without_null_len] = '\0';
|
||||
ZVAL_NEW_STR(param_z, param_z_string);
|
||||
ZSTR_VAL(param_z_string)[without_null_len] = '\0';
|
||||
ZVAL_NEW_STR(param_z, param_z_string);
|
||||
|
||||
// buffer_len is the length passed to SQLBindParameter. It must contain the space for NULL in the
|
||||
// buffer when retrieving anything but SQLSRV_ENC_BINARY/SQL_C_BINARY
|
||||
buffer_len = Z_STRLEN_P(param_z) - buffer_null_extra;
|
||||
// buffer_len is the length passed to SQLBindParameter. It must contain the space for NULL in the
|
||||
// buffer when retrieving anything but SQLSRV_ENC_BINARY/SQL_C_BINARY
|
||||
buffer_len = Z_STRLEN_P(param_z) - buffer_null_extra;
|
||||
|
||||
// Zend string length doesn't include the null terminator
|
||||
ZSTR_LEN(Z_STR_P(param_z)) -= elem_size;
|
||||
// Zend string length doesn't include the null terminator
|
||||
ZSTR_LEN(Z_STR_P(param_z)) -= elem_size;
|
||||
}
|
||||
|
||||
buffer = Z_STRVAL_P(param_z);
|
||||
buffer = Z_STRVAL_P(param_z);
|
||||
|
||||
// The StrLen_Ind_Ptr parameter of SQLBindParameter should contain the length of the data to send, which
|
||||
// may be less than the size of the buffer since the output may be more than the input. If it is greater,
|
||||
|
@ -3013,7 +3017,7 @@ void sqlsrv_output_param_dtor( _Inout_ zval* data )
|
|||
{
|
||||
sqlsrv_output_param *output_param = static_cast<sqlsrv_output_param*>( Z_PTR_P( data ));
|
||||
zval_ptr_dtor( output_param->param_z ); // undo the reference to the string we will no longer hold
|
||||
sqlsrv_free( output_param );
|
||||
sqlsrv_free( output_param );
|
||||
}
|
||||
|
||||
// called by Zend for each stream in the sqlsrv_stmt::param_streams hash table when it is cleaned/destroyed
|
||||
|
@ -3021,7 +3025,7 @@ void sqlsrv_stream_dtor( _Inout_ zval* data )
|
|||
{
|
||||
sqlsrv_stream* stream_encoding = static_cast<sqlsrv_stream*>( Z_PTR_P( data ));
|
||||
zval_ptr_dtor( stream_encoding->stream_z ); // undo the reference to the stream we will no longer hold
|
||||
sqlsrv_free( stream_encoding );
|
||||
sqlsrv_free( stream_encoding );
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -127,10 +127,13 @@ bool convert_string_from_utf16( _In_ SQLSRV_ENCODING encoding, _In_reads_bytes_(
|
|||
flags = WC_ERR_INVALID_CHARS;
|
||||
}
|
||||
|
||||
// calculate the number of characters needed
|
||||
#ifndef _WIN32
|
||||
cchOutLen = SystemLocale::FromUtf16Strict( encoding, inString, cchInLen, NULL, 0 );
|
||||
// Allocate enough space to hold the largest possible number of bytes for UTF-8 conversion
|
||||
// instead of calling FromUtf16, for performance reasons
|
||||
cchOutLen = 4*cchInLen;
|
||||
#else
|
||||
// Calculate the number of output bytes required - no performance hit here because
|
||||
// WideCharToMultiByte is highly optimised
|
||||
cchOutLen = WideCharToMultiByte( encoding, flags,
|
||||
inString, cchInLen,
|
||||
NULL, 0, NULL, NULL );
|
||||
|
@ -142,9 +145,10 @@ bool convert_string_from_utf16( _In_ SQLSRV_ENCODING encoding, _In_reads_bytes_(
|
|||
|
||||
// Create a buffer to fit the encoded string
|
||||
char* newString = reinterpret_cast<char*>( sqlsrv_malloc( cchOutLen + 1 /* NULL char*/ ));
|
||||
memset(newString, '\0', cchOutLen+1);
|
||||
|
||||
#ifndef _WIN32
|
||||
int rc = SystemLocale::FromUtf16( encoding, inString, cchInLen, newString, static_cast<int>(cchOutLen));
|
||||
int rc = SystemLocale::FromUtf16Strict( encoding, inString, cchInLen, newString, static_cast<int>(cchOutLen));
|
||||
#else
|
||||
int rc = WideCharToMultiByte( encoding, flags, inString, cchInLen, newString, static_cast<int>(cchOutLen), NULL, NULL );
|
||||
#endif // !_WIN32
|
||||
|
@ -153,9 +157,13 @@ bool convert_string_from_utf16( _In_ SQLSRV_ENCODING encoding, _In_reads_bytes_(
|
|||
sqlsrv_free( newString );
|
||||
return false;
|
||||
}
|
||||
char* newString2 = reinterpret_cast<char*>( sqlsrv_malloc( rc + 1 /* NULL char*/ ));
|
||||
memset(newString2, '\0', rc+1);
|
||||
memcpy_s(newString2, rc, newString, rc);
|
||||
sqlsrv_free( newString );
|
||||
|
||||
*outString = newString;
|
||||
newString[cchOutLen] = '\0'; // null terminate the encoded string
|
||||
*outString = newString2;
|
||||
cchOutLen = rc;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -261,6 +261,8 @@ class EncodingConverter
|
|||
return 0;
|
||||
}
|
||||
}
|
||||
//if a shift sequence is encountered, we need to advance output buffer
|
||||
iconv_ret = iconv( m_pCvtCache->GetIConv(), NULL, NULL, &dest.m_pBytes, &dest.m_nBytesLeft );
|
||||
}
|
||||
|
||||
return cchDest - (dest.m_nBytesLeft / sizeof(DestType));
|
||||
|
|
|
@ -169,8 +169,14 @@ public:
|
|||
static size_t FromUtf16Strict(UINT destCodePage, const WCHAR * src, SSIZE_T cchSrc,
|
||||
__out_ecount_opt(cchDest) char * dest, size_t cchDest,
|
||||
bool * pHasDataLoss = NULL, DWORD * pErrorCode = NULL);
|
||||
// CP1252 to UTF16 conversion which does not involve iconv
|
||||
static size_t CP1252ToUtf16( const char *src, SSIZE_T cchSrc, WCHAR *dest, size_t cchDest, DWORD *pErrorCode );
|
||||
|
||||
|
||||
// UTF8/16 conversion which does not involve iconv
|
||||
static size_t Utf8To16( const char *src, SSIZE_T cchSrc, WCHAR *dest, size_t cchDest, DWORD *pErrorCode );
|
||||
static size_t Utf8From16( const WCHAR *src, SSIZE_T cchSrc, char *dest, size_t cchDest, DWORD *pErrorCode );
|
||||
static size_t Utf8To16Strict( const char *src, SSIZE_T cchSrc, WCHAR *dest, size_t cchDest, DWORD *pErrorCode );
|
||||
static size_t Utf8From16Strict( const WCHAR *src, SSIZE_T cchSrc, char *dest, size_t cchDest, DWORD *pErrorCode );
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Public Member Functions
|
||||
|
|
|
@ -336,9 +336,300 @@ const SystemLocale & SystemLocale::Singleton()
|
|||
return s_Default;
|
||||
}
|
||||
|
||||
|
||||
// Convert CP1252 to UTF-16 without requiring iconv or taking a lock.
|
||||
// This is trivial because, except for the 80-9F range, CP1252 bytes
|
||||
// directly map to the corresponding UTF-16 codepoint.
|
||||
size_t SystemLocale::CP1252ToUtf16( const char *src, SSIZE_T cchSrc, WCHAR *dest, size_t cchDest, DWORD *pErrorCode )
|
||||
{
|
||||
const static WCHAR s_1252Map[] =
|
||||
{
|
||||
0x20AC, 0x003F, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x003F, 0x017D, 0x003F,
|
||||
0x003F, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x003F, 0x017E, 0x0178
|
||||
};
|
||||
const unsigned char *usrc = reinterpret_cast<const unsigned char*>(src);
|
||||
const unsigned char *srcEnd = usrc + cchSrc;
|
||||
const WCHAR *destEnd = dest + cchDest;
|
||||
|
||||
while(usrc < srcEnd && dest < destEnd)
|
||||
{
|
||||
DWORD ucode = *usrc++;
|
||||
*dest++ = (ucode <= 127 || ucode >= 160) ? ucode : s_1252Map[ucode - 128];
|
||||
}
|
||||
pErrorCode && (*pErrorCode = (dest == destEnd && usrc != srcEnd) ? ERROR_INSUFFICIENT_BUFFER : ERROR_SUCCESS);
|
||||
return cchDest - (destEnd - dest);
|
||||
}
|
||||
|
||||
// Convert UTF-8 to UTF-16 without requiring iconv or taking a lock.
|
||||
// 0abcdefg -> 0abcdefg 00000000
|
||||
// 110abcde 10fghijk -> defghijk 00000abc
|
||||
// 1110abcd 10efghij 10klmnop -> ijklmnop abcdefgh
|
||||
// 11110abc 10defghi 10jklmno 10pqrstu -> cdfghijk 110110ab nopqrstu 11011lm
|
||||
size_t SystemLocale::Utf8To16( const char *src, SSIZE_T cchSrc, WCHAR *dest, size_t cchDest, DWORD *pErrorCode )
|
||||
{
|
||||
const unsigned char *usrc = reinterpret_cast<const unsigned char*>(src);
|
||||
const unsigned char *srcEnd = usrc + cchSrc;
|
||||
const WCHAR *destEnd = dest + cchDest;
|
||||
DWORD dummyError;
|
||||
if (!pErrorCode)
|
||||
{
|
||||
pErrorCode = &dummyError;
|
||||
}
|
||||
*pErrorCode = 0;
|
||||
|
||||
while(usrc < srcEnd && dest < destEnd)
|
||||
{
|
||||
DWORD ucode = *usrc++;
|
||||
if(ucode <= 127) // Most common case for ASCII
|
||||
{
|
||||
*dest++ = ucode;
|
||||
}
|
||||
else if(ucode < 0xC0) // unexpected trailing byte 10xxxxxx
|
||||
{
|
||||
goto Invalid;
|
||||
}
|
||||
else if(ucode < 0xE0) // 110abcde 10fghijk
|
||||
{
|
||||
if (usrc >= srcEnd || *usrc < 0x80 || *usrc > 0xBF ||
|
||||
(*dest = (ucode & 0x1F)<<6 | (*usrc++ & 0x3F)) < 0x80)
|
||||
{
|
||||
*dest = 0xFFFD;
|
||||
}
|
||||
dest++;
|
||||
}
|
||||
else if(ucode < 0xF0) // 1110abcd 10efghij 10klmnop
|
||||
{
|
||||
if (usrc >= srcEnd)
|
||||
{
|
||||
goto Invalid;
|
||||
}
|
||||
DWORD c1 = *usrc;
|
||||
if (c1 < 0x80 || c1 > 0xBF)
|
||||
{
|
||||
goto Invalid;
|
||||
}
|
||||
usrc++;
|
||||
if (usrc >= srcEnd)
|
||||
{
|
||||
goto Invalid;
|
||||
}
|
||||
DWORD c2 = *usrc;
|
||||
if (c2 < 0x80 || c2 > 0xBF)
|
||||
{
|
||||
goto Invalid;
|
||||
}
|
||||
usrc++;
|
||||
ucode = (ucode&15)<<12 | (c1&0x3F)<<6 | (c2&0x3F);
|
||||
if (ucode < 0x800 || ucode >= 0xD800 && ucode <= 0xDFFF)
|
||||
{
|
||||
goto Invalid;
|
||||
}
|
||||
*dest++ = ucode;
|
||||
}
|
||||
else if(ucode < 0xF8) // 11110abc 10defghi 10jklmno 10pqrstu
|
||||
{
|
||||
if (usrc >= srcEnd)
|
||||
{
|
||||
goto Invalid;
|
||||
}
|
||||
DWORD c1 = *usrc;
|
||||
if (c1 < 0x80 || c1 > 0xBF)
|
||||
{
|
||||
goto Invalid;
|
||||
}
|
||||
usrc++;
|
||||
if (usrc >= srcEnd)
|
||||
{
|
||||
goto Invalid;
|
||||
}
|
||||
DWORD c2 = *usrc;
|
||||
if (c2 < 0x80 || c2 > 0xBF)
|
||||
{
|
||||
goto Invalid;
|
||||
}
|
||||
usrc++;
|
||||
if (usrc >= srcEnd)
|
||||
{
|
||||
goto Invalid;
|
||||
}
|
||||
DWORD c3 = *usrc;
|
||||
if (c3 < 0x80 || c3 > 0xBF)
|
||||
{
|
||||
goto Invalid;
|
||||
}
|
||||
usrc++;
|
||||
ucode = (ucode&7)<<18 | (c1&0x3F)<<12 | (c2&0x3F)<<6 | (c3&0x3F);
|
||||
|
||||
if (ucode < 0x10000 // overlong encoding
|
||||
|| ucode > 0x10FFFF // exceeds Unicode range
|
||||
|| ucode >= 0xD800 && ucode <= 0xDFFF) // surrogate pairs
|
||||
{
|
||||
goto Invalid;
|
||||
}
|
||||
if (dest >= destEnd - 1)
|
||||
{
|
||||
*pErrorCode = ERROR_INSUFFICIENT_BUFFER;
|
||||
return cchDest - (destEnd - dest);
|
||||
}
|
||||
ucode -= 0x10000;
|
||||
// Lead surrogate
|
||||
*dest++ = 0xD800 + (ucode >> 10);
|
||||
// Trail surrogate
|
||||
*dest++ = 0xDC00 + (ucode & 0x3FF);
|
||||
}
|
||||
else // invalid
|
||||
{
|
||||
Invalid:
|
||||
*dest++ = 0xFFFD;
|
||||
}
|
||||
}
|
||||
if (!*pErrorCode)
|
||||
{
|
||||
*pErrorCode = (dest == destEnd && usrc != srcEnd) ? ERROR_INSUFFICIENT_BUFFER : ERROR_SUCCESS;
|
||||
}
|
||||
return cchDest - (destEnd - dest);
|
||||
}
|
||||
|
||||
size_t SystemLocale::Utf8To16Strict( const char *src, SSIZE_T cchSrc, WCHAR *dest, size_t cchDest, DWORD *pErrorCode )
|
||||
{
|
||||
const unsigned char *usrc = reinterpret_cast<const unsigned char*>(src);
|
||||
const unsigned char *srcEnd = usrc + cchSrc;
|
||||
const WCHAR *destEnd = dest + cchDest;
|
||||
DWORD dummyError;
|
||||
if (!pErrorCode)
|
||||
{
|
||||
pErrorCode = &dummyError;
|
||||
}
|
||||
*pErrorCode = 0;
|
||||
|
||||
while(usrc < srcEnd && dest < destEnd)
|
||||
{
|
||||
DWORD ucode = *usrc++;
|
||||
if(ucode <= 127) // Most common case for ASCII
|
||||
{
|
||||
*dest++ = ucode;
|
||||
}
|
||||
else if(ucode < 0xC0) // unexpected trailing byte 10xxxxxx
|
||||
{
|
||||
goto Invalid;
|
||||
}
|
||||
else if(ucode < 0xE0) // 110abcde 10fghijk
|
||||
{
|
||||
if (usrc >= srcEnd || *usrc < 0x80 || *usrc > 0xBF ||
|
||||
(*dest = (ucode & 0x1F)<<6 | (*usrc++ & 0x3F)) < 0x80)
|
||||
{
|
||||
goto Invalid;
|
||||
}
|
||||
dest++;
|
||||
}
|
||||
else if(ucode < 0xF0) // 1110abcd 10efghij 10klmnop
|
||||
{
|
||||
if (usrc >= srcEnd)
|
||||
{
|
||||
goto Invalid;
|
||||
}
|
||||
DWORD c1 = *usrc;
|
||||
if (c1 < 0x80 || c1 > 0xBF)
|
||||
{
|
||||
goto Invalid;
|
||||
}
|
||||
usrc++;
|
||||
if (usrc >= srcEnd)
|
||||
{
|
||||
goto Invalid;
|
||||
}
|
||||
DWORD c2 = *usrc;
|
||||
if (c2 < 0x80 || c2 > 0xBF)
|
||||
{
|
||||
goto Invalid;
|
||||
}
|
||||
usrc++;
|
||||
ucode = (ucode&15)<<12 | (c1&0x3F)<<6 | (c2&0x3F);
|
||||
if (ucode < 0x800 || ucode >= 0xD800 && ucode <= 0xDFFF)
|
||||
{
|
||||
goto Invalid;
|
||||
}
|
||||
*dest++ = ucode;
|
||||
}
|
||||
else if(ucode < 0xF8) // 11110abc 10defghi 10jklmno 10pqrstu
|
||||
{
|
||||
if (usrc >= srcEnd)
|
||||
{
|
||||
goto Invalid;
|
||||
}
|
||||
DWORD c1 = *usrc;
|
||||
if (c1 < 0x80 || c1 > 0xBF)
|
||||
{
|
||||
goto Invalid;
|
||||
}
|
||||
usrc++;
|
||||
if (usrc >= srcEnd)
|
||||
{
|
||||
goto Invalid;
|
||||
}
|
||||
DWORD c2 = *usrc;
|
||||
if (c2 < 0x80 || c2 > 0xBF)
|
||||
{
|
||||
goto Invalid;
|
||||
}
|
||||
usrc++;
|
||||
if (usrc >= srcEnd)
|
||||
{
|
||||
goto Invalid;
|
||||
}
|
||||
DWORD c3 = *usrc;
|
||||
if (c3 < 0x80 || c3 > 0xBF)
|
||||
{
|
||||
goto Invalid;
|
||||
}
|
||||
usrc++;
|
||||
ucode = (ucode&7)<<18 | (c1&0x3F)<<12 | (c2&0x3F)<<6 | (c3&0x3F);
|
||||
|
||||
if (ucode < 0x10000 // overlong encoding
|
||||
|| ucode > 0x10FFFF // exceeds Unicode range
|
||||
|| ucode >= 0xD800 && ucode <= 0xDFFF) // surrogate pairs
|
||||
{
|
||||
goto Invalid;
|
||||
}
|
||||
if (dest >= destEnd - 1)
|
||||
{
|
||||
*pErrorCode = ERROR_INSUFFICIENT_BUFFER;
|
||||
return cchDest - (destEnd - dest);
|
||||
}
|
||||
ucode -= 0x10000;
|
||||
// Lead surrogate
|
||||
*dest++ = 0xD800 + (ucode >> 10);
|
||||
// Trail surrogate
|
||||
*dest++ = 0xDC00 + (ucode & 0x3FF);
|
||||
}
|
||||
else // invalid
|
||||
{
|
||||
Invalid:
|
||||
*pErrorCode = ERROR_NO_UNICODE_TRANSLATION;
|
||||
return 0 ;
|
||||
}
|
||||
}
|
||||
if (!*pErrorCode)
|
||||
{
|
||||
*pErrorCode = (dest == destEnd && usrc != srcEnd) ? ERROR_INSUFFICIENT_BUFFER : ERROR_SUCCESS;
|
||||
}
|
||||
return cchDest - (destEnd - dest);
|
||||
}
|
||||
|
||||
size_t SystemLocale::ToUtf16( UINT srcCodePage, const char * src, SSIZE_T cchSrc, WCHAR * dest, size_t cchDest, DWORD * pErrorCode )
|
||||
{
|
||||
srcCodePage = ExpandSpecialCP( srcCodePage );
|
||||
if ( dest )
|
||||
{
|
||||
if ( srcCodePage == CP_UTF8 )
|
||||
{
|
||||
return SystemLocale::Utf8To16( src, cchSrc < 0 ? (1+strlen(src)) : cchSrc, dest, cchDest, pErrorCode );
|
||||
}
|
||||
else if ( srcCodePage == 1252 )
|
||||
{
|
||||
return SystemLocale::CP1252ToUtf16( src, cchSrc < 0 ? (1+strlen(src)) : cchSrc, dest, cchDest, pErrorCode );
|
||||
}
|
||||
}
|
||||
EncodingConverter cvt( CP_UTF16, srcCodePage );
|
||||
if ( !cvt.Initialize() )
|
||||
{
|
||||
|
@ -354,6 +645,17 @@ size_t SystemLocale::ToUtf16( UINT srcCodePage, const char * src, SSIZE_T cchSrc
|
|||
size_t SystemLocale::ToUtf16Strict( UINT srcCodePage, const char * src, SSIZE_T cchSrc, WCHAR * dest, size_t cchDest, DWORD * pErrorCode )
|
||||
{
|
||||
srcCodePage = ExpandSpecialCP( srcCodePage );
|
||||
if ( dest )
|
||||
{
|
||||
if ( srcCodePage == CP_UTF8 )
|
||||
{
|
||||
return SystemLocale::Utf8To16Strict( src, cchSrc < 0 ? (1+strlen(src)) : cchSrc, dest, cchDest, pErrorCode );
|
||||
}
|
||||
else if ( srcCodePage == 1252 )
|
||||
{
|
||||
return SystemLocale::CP1252ToUtf16( src, cchSrc < 0 ? (1+strlen(src)) : cchSrc, dest, cchDest, pErrorCode );
|
||||
}
|
||||
}
|
||||
EncodingConverter cvt( CP_UTF16, srcCodePage );
|
||||
if ( !cvt.Initialize() )
|
||||
{
|
||||
|
@ -366,9 +668,282 @@ size_t SystemLocale::ToUtf16Strict( UINT srcCodePage, const char * src, SSIZE_T
|
|||
return cvt.Convert( dest, cchDest, src, cchSrcActual, true, &hasLoss, pErrorCode );
|
||||
}
|
||||
|
||||
size_t SystemLocale::Utf8From16( const WCHAR *src, SSIZE_T cchSrc, char *dest, size_t cchDest, DWORD *pErrorCode )
|
||||
{
|
||||
const WCHAR *srcEnd = src + cchSrc;
|
||||
char *destEnd = dest + cchDest;
|
||||
DWORD dummyError;
|
||||
if (!pErrorCode)
|
||||
{
|
||||
pErrorCode = &dummyError;
|
||||
}
|
||||
*pErrorCode = 0;
|
||||
|
||||
// null dest is a special mode to calculate the output size required.
|
||||
if (!dest)
|
||||
{
|
||||
size_t cbOut = 0;
|
||||
while (src < srcEnd)
|
||||
{
|
||||
DWORD wch = *src++;
|
||||
if (wch < 128) // most common case.
|
||||
{
|
||||
cbOut++;
|
||||
}
|
||||
else if (wch < 0x800) // 127 to 2047: 2 bytes
|
||||
{
|
||||
cbOut += 2;
|
||||
}
|
||||
else if (wch < 0xD800 || wch > 0xDFFF) // 2048 to 55295 and 57344 to 65535: 3 bytes
|
||||
{
|
||||
cbOut += 3;
|
||||
}
|
||||
else if (wch < 0xDC00) // 65536 to end of Unicode: 4 bytes
|
||||
{
|
||||
if (src >= srcEnd)
|
||||
{
|
||||
cbOut += 3; // lone surrogate at end
|
||||
}
|
||||
else if (*src < 0xDC00 || *src > 0xDFFF)
|
||||
{
|
||||
cbOut += 3; // low surrogate not followed by high
|
||||
}
|
||||
else
|
||||
{
|
||||
cbOut += 4;
|
||||
}
|
||||
}
|
||||
else // unexpected trail surrogate
|
||||
{
|
||||
cbOut += 3;
|
||||
}
|
||||
}
|
||||
return cbOut;
|
||||
}
|
||||
while ( src < srcEnd && dest < destEnd )
|
||||
{
|
||||
DWORD wch = *src++;
|
||||
if (wch < 128) // most common case.
|
||||
{
|
||||
*dest++ = wch;
|
||||
}
|
||||
else if (wch < 0x800) // 127 to 2047: 2 bytes
|
||||
{
|
||||
if (destEnd - dest < 2)
|
||||
{
|
||||
*pErrorCode = ERROR_INSUFFICIENT_BUFFER;
|
||||
return 0;
|
||||
}
|
||||
*dest++ = 0xC0 | (wch >> 6);
|
||||
*dest++ = 0x80 | (wch & 0x3F);
|
||||
}
|
||||
else if (wch < 0xD800 || wch > 0xDFFF) // 2048 to 55295 and 57344 to 65535: 3 bytes
|
||||
{
|
||||
if (destEnd - dest < 3)
|
||||
{
|
||||
*pErrorCode = ERROR_INSUFFICIENT_BUFFER;
|
||||
return 0;
|
||||
}
|
||||
*dest++ = 0xE0 | (wch >> 12);
|
||||
*dest++ = 0x80 | (wch >> 6)&0x3F;
|
||||
*dest++ = 0x80 | (wch &0x3F);
|
||||
}
|
||||
else if (wch < 0xDC00) // 65536 to end of Unicode: 4 bytes
|
||||
{
|
||||
if (src >= srcEnd)
|
||||
{
|
||||
*pErrorCode = ERROR_NO_UNICODE_TRANSLATION; // lone surrogate at end
|
||||
if (destEnd - dest < 3)
|
||||
{
|
||||
*pErrorCode = ERROR_INSUFFICIENT_BUFFER;
|
||||
return 0;
|
||||
}
|
||||
*dest++ = 0xEF;
|
||||
*dest++ = 0xBF;
|
||||
*dest++ = 0xBD;
|
||||
continue;
|
||||
}
|
||||
if (*src < 0xDC00 || *src > 0xDFFF)
|
||||
{
|
||||
// low surrogate not followed by high
|
||||
if (destEnd - dest < 3)
|
||||
{
|
||||
*pErrorCode = ERROR_INSUFFICIENT_BUFFER;
|
||||
return 0;
|
||||
}
|
||||
*dest++ = 0xEF;
|
||||
*dest++ = 0xBF;
|
||||
*dest++ = 0xBD;
|
||||
continue;
|
||||
}
|
||||
wch = 0x10000 + ((wch - 0xD800)<<10) + *src++ - 0xDC00;
|
||||
if (destEnd - dest < 4)
|
||||
{
|
||||
*pErrorCode = ERROR_INSUFFICIENT_BUFFER;
|
||||
return 0;
|
||||
}
|
||||
*dest++ = 0xF0 | (wch >> 18);
|
||||
*dest++ = 0x80 | (wch >>12)&0x3F;
|
||||
*dest++ = 0x80 | (wch >> 6)&0x3F;
|
||||
*dest++ = 0x80 | wch&0x3F;
|
||||
}
|
||||
else // unexpected trail surrogate
|
||||
{
|
||||
*pErrorCode = ERROR_NO_UNICODE_TRANSLATION; // lone surrogate at end
|
||||
if (destEnd - dest < 3)
|
||||
{
|
||||
*pErrorCode = ERROR_INSUFFICIENT_BUFFER;
|
||||
return 0;
|
||||
}
|
||||
*dest++ = 0xEF;
|
||||
*dest++ = 0xBF;
|
||||
*dest++ = 0xBD;
|
||||
}
|
||||
}
|
||||
if (!*pErrorCode)
|
||||
{
|
||||
*pErrorCode = (dest == destEnd && src != srcEnd) ? ERROR_INSUFFICIENT_BUFFER : ERROR_SUCCESS;
|
||||
}
|
||||
return *pErrorCode == ERROR_INSUFFICIENT_BUFFER ? 0 : cchDest - (destEnd - dest);
|
||||
}
|
||||
|
||||
size_t SystemLocale::Utf8From16Strict( const WCHAR *src, SSIZE_T cchSrc, char *dest, size_t cchDest, DWORD *pErrorCode )
|
||||
{
|
||||
const WCHAR *srcEnd = src + cchSrc;
|
||||
char *destEnd = dest + cchDest;
|
||||
DWORD dummyError;
|
||||
if (!pErrorCode)
|
||||
{
|
||||
pErrorCode = &dummyError;
|
||||
}
|
||||
*pErrorCode = 0;
|
||||
|
||||
// null dest is a special mode to calculate the output size required.
|
||||
if (!dest)
|
||||
{
|
||||
size_t cbOut = 0;
|
||||
while (src < srcEnd)
|
||||
{
|
||||
DWORD wch = *src++;
|
||||
if (wch < 128) // most common case.
|
||||
{
|
||||
cbOut++;
|
||||
}
|
||||
else if (wch < 0x800) // 127 to 2047: 2 bytes
|
||||
{
|
||||
cbOut += 2;
|
||||
}
|
||||
else if (wch < 0xD800 || wch > 0xDFFF) // 2048 to 55295 and 57344 to 65535: 3 bytes
|
||||
{
|
||||
cbOut += 3;
|
||||
}
|
||||
else if (wch < 0xDC00) // 65536 to end of Unicode: 4 bytes
|
||||
{
|
||||
if (src >= srcEnd)
|
||||
{
|
||||
cbOut += 3; // lone surrogate at end
|
||||
}
|
||||
else if (*src < 0xDC00 || *src > 0xDFFF)
|
||||
{
|
||||
cbOut += 3; // low surrogate not followed by high
|
||||
}
|
||||
else
|
||||
{
|
||||
cbOut += 4;
|
||||
}
|
||||
}
|
||||
else // unexpected trail surrogate
|
||||
{
|
||||
cbOut += 3;
|
||||
}
|
||||
}
|
||||
return cbOut;
|
||||
}
|
||||
while ( src < srcEnd && dest < destEnd )
|
||||
{
|
||||
DWORD wch = *src++;
|
||||
if (wch < 128) // most common case.
|
||||
{
|
||||
*dest++ = wch;
|
||||
}
|
||||
else if (wch < 0x800) // 127 to 2047: 2 bytes
|
||||
{
|
||||
if (destEnd - dest < 2)
|
||||
{
|
||||
*pErrorCode = ERROR_INSUFFICIENT_BUFFER;
|
||||
return 0;
|
||||
}
|
||||
*dest++ = 0xC0 | (wch >> 6);
|
||||
*dest++ = 0x80 | (wch & 0x3F);
|
||||
}
|
||||
else if (wch < 0xD800 || wch > 0xDFFF) // 2048 to 55295 and 57344 to 65535: 3 bytes
|
||||
{
|
||||
if (destEnd - dest < 3)
|
||||
{
|
||||
*pErrorCode = ERROR_INSUFFICIENT_BUFFER;
|
||||
return 0;
|
||||
}
|
||||
*dest++ = 0xE0 | (wch >> 12);
|
||||
*dest++ = 0x80 | (wch >> 6)&0x3F;
|
||||
*dest++ = 0x80 | (wch &0x3F);
|
||||
}
|
||||
else if (wch < 0xDC00) // 65536 to end of Unicode: 4 bytes
|
||||
{
|
||||
if (src >= srcEnd)
|
||||
{
|
||||
*pErrorCode = ERROR_NO_UNICODE_TRANSLATION; // lone surrogate at end
|
||||
if (destEnd - dest < 3)
|
||||
{
|
||||
*pErrorCode = ERROR_INSUFFICIENT_BUFFER;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
if (*src < 0xDC00 || *src > 0xDFFF)
|
||||
{
|
||||
*pErrorCode = ERROR_NO_UNICODE_TRANSLATION; // low surrogate not followed by high
|
||||
if (destEnd - dest < 3)
|
||||
{
|
||||
*pErrorCode = ERROR_INSUFFICIENT_BUFFER;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
wch = 0x10000 + ((wch - 0xD800)<<10) + *src++ - 0xDC00;
|
||||
if (destEnd - dest < 4)
|
||||
{
|
||||
*pErrorCode = ERROR_INSUFFICIENT_BUFFER;
|
||||
return 0;
|
||||
}
|
||||
*dest++ = 0xF0 | (wch >> 18);
|
||||
*dest++ = 0x80 | (wch >>12)&0x3F;
|
||||
*dest++ = 0x80 | (wch >> 6)&0x3F;
|
||||
*dest++ = 0x80 | wch&0x3F;
|
||||
}
|
||||
else // unexpected trail surrogate
|
||||
{
|
||||
*pErrorCode = ERROR_NO_UNICODE_TRANSLATION; // lone surrogate at end
|
||||
if (destEnd - dest < 3)
|
||||
{
|
||||
*pErrorCode = ERROR_INSUFFICIENT_BUFFER;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
if (!*pErrorCode)
|
||||
{
|
||||
*pErrorCode = (dest == destEnd && src != srcEnd) ? ERROR_INSUFFICIENT_BUFFER : ERROR_SUCCESS;
|
||||
}
|
||||
return *pErrorCode == ERROR_INSUFFICIENT_BUFFER ? 0 : cchDest - (destEnd - dest);
|
||||
}
|
||||
|
||||
size_t SystemLocale::FromUtf16( UINT destCodePage, const WCHAR * src, SSIZE_T cchSrc, char * dest, size_t cchDest, bool * pHasDataLoss, DWORD * pErrorCode )
|
||||
{
|
||||
destCodePage = ExpandSpecialCP( destCodePage );
|
||||
if ( destCodePage == CP_UTF8 )
|
||||
{
|
||||
pHasDataLoss && (*pHasDataLoss = 0);
|
||||
return SystemLocale::Utf8From16( src, cchSrc < 0 ? 1+mplat_wcslen(src) : cchSrc, dest, cchDest, pErrorCode );
|
||||
}
|
||||
EncodingConverter cvt( destCodePage, CP_UTF16 );
|
||||
if ( !cvt.Initialize() )
|
||||
{
|
||||
|
@ -384,6 +959,11 @@ size_t SystemLocale::FromUtf16( UINT destCodePage, const WCHAR * src, SSIZE_T cc
|
|||
size_t SystemLocale::FromUtf16Strict(UINT destCodePage, const WCHAR * src, SSIZE_T cchSrc, char * dest, size_t cchDest, bool * pHasDataLoss, DWORD * pErrorCode)
|
||||
{
|
||||
destCodePage = ExpandSpecialCP(destCodePage);
|
||||
if ( destCodePage == CP_UTF8 )
|
||||
{
|
||||
pHasDataLoss && (*pHasDataLoss = 0);
|
||||
return SystemLocale::Utf8From16Strict( src, cchSrc < 0 ? 1+mplat_wcslen(src) : cchSrc, dest, cchDest, pErrorCode );
|
||||
}
|
||||
EncodingConverter cvt(destCodePage, CP_UTF16);
|
||||
if (!cvt.Initialize())
|
||||
{
|
||||
|
|
Loading…
Reference in a new issue