Added rank info to sensitivity classification (#1183)

This commit is contained in:
Jenny Tam 2020-09-02 10:39:54 -07:00 committed by GitHub
parent f2cae4c34f
commit 86adf470cb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 763 additions and 9 deletions

View file

@ -383,6 +383,9 @@ SQLRETURN core_odbc_connect( _Inout_ sqlsrv_conn* conn, _Inout_ std::string& con
sqlsrv_malloc_auto_ptr<SQLWCHAR> wconn_string;
unsigned int wconn_len = static_cast<unsigned int>( conn_str.length() + 1 ) * sizeof( SQLWCHAR );
// Set the desired data classification version before connecting, but older ODBC drivers will generate a warning message 'Driver's SQLSetConnectAttr failed'
SQLSetConnectAttr(conn->handle(), SQL_COPT_SS_DATACLASSIFICATION_VERSION, reinterpret_cast<SQLPOINTER>(data_classification::VERSION_RANK_AVAILABLE), SQL_IS_POINTER);
// We only support UTF-8 encoding for connection string.
// Convert our UTF-8 connection string to UTF-16 before connecting with SQLDriverConnnectW
wconn_string = utf16_string_from_mbcs_string( SQLSRV_ENCODING_UTF8, conn_str.c_str(), static_cast<unsigned int>( conn_str.length() ), &wconn_len, true );

View file

@ -1440,13 +1440,15 @@ struct sqlsrv_output_param {
};
namespace data_classification {
const int VERSION_RANK_AVAILABLE = 2; // Rank info is available when data classification version is 2+
const int RANK_NOT_DEFINED = -1;
// *** data classficiation metadata structures and helper methods -- to store and/or process the sensitivity classification data ***
struct name_id_pair;
struct sensitivity_metadata;
void name_id_pair_free(name_id_pair * pair);
void parse_sensitivity_name_id_pairs(_Inout_ sqlsrv_stmt* stmt, _Inout_ USHORT& numpairs, _Inout_ std::vector<name_id_pair*, sqlsrv_allocator<name_id_pair*>>* pairs, _Inout_ unsigned char **pptr);
void parse_column_sensitivity_props(_Inout_ sensitivity_metadata* meta, _Inout_ unsigned char **pptr);
void parse_column_sensitivity_props(_Inout_ sensitivity_metadata* meta, _Inout_ unsigned char **pptr, _In_ bool getRankInfo);
USHORT fill_column_sensitivity_array(_Inout_ sqlsrv_stmt* stmt, _In_ SQLSMALLINT colno, _Inout_ zval *column_data);
struct name_id_pair {
@ -1467,8 +1469,9 @@ namespace data_classification {
struct label_infotype_pair {
USHORT label_idx;
USHORT infotype_idx;
int rank; // Default value is "not defined"
label_infotype_pair() : label_idx(0), infotype_idx(0)
label_infotype_pair() : label_idx(0), infotype_idx(0), rank(RANK_NOT_DEFINED)
{
}
};
@ -1494,8 +1497,9 @@ namespace data_classification {
std::vector<name_id_pair*, sqlsrv_allocator<name_id_pair*>> infotypes;
USHORT num_columns;
std::vector<column_sensitivity> columns_sensitivity;
int rank; // Default value is "not defined"
sensitivity_metadata() : num_labels(0), num_infotypes(0), num_columns(0)
sensitivity_metadata() : num_labels(0), num_infotypes(0), num_columns(0), rank(RANK_NOT_DEFINED)
{
}

View file

@ -1017,7 +1017,8 @@ field_meta_data* core_sqlsrv_field_metadata( _Inout_ sqlsrv_stmt* stmt, _In_ SQL
void core_sqlsrv_sensitivity_metadata( _Inout_ sqlsrv_stmt* stmt )
{
sqlsrv_malloc_auto_ptr<unsigned char> dcbuf;
SQLINTEGER dclen = 0;
DWORD dcVersion = 0;
SQLINTEGER dclen = 0, dcIRD = 0;
SQLINTEGER dclenout = 0;
SQLHANDLE ird;
SQLRETURN r;
@ -1039,14 +1040,14 @@ void core_sqlsrv_sensitivity_metadata( _Inout_ sqlsrv_stmt* stmt )
// Reference: https://docs.microsoft.com/sql/connect/odbc/data-classification
// To retrieve sensitivity classfication data, the first step is to retrieve the IRD(Implementation Row Descriptor) handle by
// calling SQLGetStmtAttr with SQL_ATTR_IMP_ROW_DESC statement attribute
r = ::SQLGetStmtAttr(stmt->handle(), SQL_ATTR_IMP_ROW_DESC, (SQLPOINTER)&ird, SQL_IS_POINTER, 0);
r = ::SQLGetStmtAttr(stmt->handle(), SQL_ATTR_IMP_ROW_DESC, reinterpret_cast<SQLPOINTER*>(&ird), SQL_IS_POINTER, 0);
CHECK_SQL_ERROR_OR_WARNING(r, stmt) {
LOG(SEV_ERROR, "core_sqlsrv_sensitivity_metadata: failed in getting Implementation Row Descriptor handle." );
throw core::CoreException();
}
// First call to get dclen
r = ::SQLGetDescFieldW(ird, 0, SQL_CA_SS_DATA_CLASSIFICATION, dcbuf, 0, &dclen);
r = ::SQLGetDescFieldW(ird, 0, SQL_CA_SS_DATA_CLASSIFICATION, reinterpret_cast<SQLPOINTER>(dcbuf.get()), 0, &dclen);
if (r != SQL_SUCCESS || dclen == 0) {
// log the error first
LOG(SEV_ERROR, "core_sqlsrv_sensitivity_metadata: failed in calling SQLGetDescFieldW first time." );
@ -1073,7 +1074,7 @@ void core_sqlsrv_sensitivity_metadata( _Inout_ sqlsrv_stmt* stmt )
// Call again to read SQL_CA_SS_DATA_CLASSIFICATION data
dcbuf = static_cast<unsigned char*>(sqlsrv_malloc(dclen * sizeof(char)));
r = ::SQLGetDescFieldW(ird, 0, SQL_CA_SS_DATA_CLASSIFICATION, dcbuf, dclen, &dclenout);
r = ::SQLGetDescFieldW(ird, 0, SQL_CA_SS_DATA_CLASSIFICATION, reinterpret_cast<SQLPOINTER>(dcbuf.get()), dclen, &dclenout);
if (r != SQL_SUCCESS) {
LOG(SEV_ERROR, "core_sqlsrv_sensitivity_metadata: failed in calling SQLGetDescFieldW again." );
@ -1084,6 +1085,16 @@ void core_sqlsrv_sensitivity_metadata( _Inout_ sqlsrv_stmt* stmt )
// Start parsing the data (blob)
using namespace data_classification;
// If make it this far, must be using ODBC 17.2 or above. Prior to ODBC 17.4, checking Data Classification version will fail.
// When the function is successful and the version is right, rank info is available for retrieval
bool getRankInfo = false;
r = ::SQLGetDescFieldW(ird, 0, SQL_CA_SS_DATA_CLASSIFICATION_VERSION, reinterpret_cast<SQLPOINTER>(&dcVersion), SQL_IS_INTEGER, &dcIRD);
if (r == SQL_SUCCESS && dcVersion >= VERSION_RANK_AVAILABLE) {
getRankInfo = true;
}
// Start parsing the data (blob)
unsigned char *dcptr = dcbuf;
sqlsrv_malloc_auto_ptr<sensitivity_metadata> sensitivity_meta;
@ -1094,7 +1105,7 @@ void core_sqlsrv_sensitivity_metadata( _Inout_ sqlsrv_stmt* stmt )
parse_sensitivity_name_id_pairs(stmt, sensitivity_meta->num_infotypes, &sensitivity_meta->infotypes, &dcptr);
// Next parse the sensitivity properties
parse_column_sensitivity_props(sensitivity_meta, &dcptr);
parse_column_sensitivity_props(sensitivity_meta, &dcptr, getRankInfo);
unsigned char *dcend = dcbuf;
dcend += dclen;

View file

@ -489,6 +489,7 @@ namespace data_classification {
const char* INFOTYPE = "Information Type";
const char* NAME = "name";
const char* ID = "id";
const char* RANK = "rank";
void convert_sensivity_field(_Inout_ sqlsrv_stmt* stmt, _In_ SQLSRV_ENCODING encoding, _In_ unsigned char *ptr, _In_ int len, _Inout_updates_bytes_(cchOutLen) char** field_name)
{
@ -566,10 +567,18 @@ namespace data_classification {
*pptr = ptr;
}
void parse_column_sensitivity_props(_Inout_ sensitivity_metadata* meta, _Inout_ unsigned char **pptr)
void parse_column_sensitivity_props(_Inout_ sensitivity_metadata* meta, _Inout_ unsigned char **pptr, _In_ bool getRankInfo)
{
unsigned char *ptr = *pptr;
unsigned short ncols;
int queryrank, colrank;
// Get rank info
if (getRankInfo) {
queryrank = *(reinterpret_cast<long*>(ptr));
ptr += sizeof(long);
meta->rank = queryrank;
}
// Get number of columns
meta->num_columns = ncols = *(reinterpret_cast<unsigned short*>(ptr));
@ -594,6 +603,12 @@ namespace data_classification {
typeidx = *(reinterpret_cast<unsigned short*>(ptr));
ptr += sizeof(unsigned short);
if (getRankInfo) {
colrank = *(reinterpret_cast<long*>(ptr));
ptr += sizeof(long);
pair.rank = colrank;
}
pair.label_idx = labelidx;
pair.infotype_idx = typeidx;
@ -641,6 +656,7 @@ namespace data_classification {
USHORT labelidx = meta->columns_sensitivity[colno].label_info_pairs[j].label_idx;
USHORT typeidx = meta->columns_sensitivity[colno].label_info_pairs[j].infotype_idx;
int column_rank = meta->columns_sensitivity[colno].label_info_pairs[j].rank;
char *label = meta->labels[labelidx]->name;
char *label_id = meta->labels[labelidx]->id;
@ -657,10 +673,21 @@ namespace data_classification {
add_assoc_zval(&sensitivity_properties, INFOTYPE, &infotype_array);
// add column sensitivity rank info to sensitivity_properties
if (column_rank > RANK_NOT_DEFINED) {
add_assoc_long(&sensitivity_properties, RANK, column_rank);
}
// add the pair of sensitivity properties to data_classification
add_next_index_zval(&data_classification, &sensitivity_properties);
}
// add query sensitivity rank info to data_classification
int query_rank = meta->rank;
if (query_rank > RANK_NOT_DEFINED) {
add_assoc_long(&data_classification, RANK, query_rank);
}
// add data classfication as associative array
add_assoc_zval(return_array, DATA_CLASS, &data_classification);

View file

@ -94,6 +94,10 @@
#define SQL_COPT_SS_AUTHENTICATION (SQL_COPT_SS_BASE_EX+15)// The authentication method used for the connection
#define SQL_COPT_SS_ACCESS_TOKEN (SQL_COPT_SS_BASE_EX+16)// The authentication access token used for the connection
/* SQLSetConnectAttr MS driver additional specific defines. */
#define SQL_COPT_SS_BASE_ADD 1400
#define SQL_COPT_SS_DATACLASSIFICATION_VERSION (SQL_COPT_SS_BASE_ADD + 0) // The flag to Set/Get DATACLASSIFICATION version support
// SQLColAttributes driver specific defines.
// SQLSetDescField/SQLGetDescField driver specific defines.
// Microsoft has 1200 thru 1249 reserved for Microsoft ODBC Driver for SQL Server usage.
@ -146,6 +150,7 @@
// Data Classification
#define SQL_CA_SS_DATA_CLASSIFICATION (SQL_CA_SS_BASE+37) // retrieve data classification information
#define SQL_CA_SS_DATA_CLASSIFICATION_VERSION (SQL_CA_SS_BASE+38) // retrieve data classification version
#define SQL_CA_SS_MAX_USED (SQL_CA_SS_BASE+38)

View file

@ -0,0 +1,357 @@
--TEST--
Test data classification feature - retrieving sensitivity metadata if supported
--DESCRIPTION--
If both ODBC and server support this feature, this test verifies that sensitivity metadata can be added and correctly retrieved. If not, it will at least test the new statement attribute and some error cases.
T-SQL reference: https://docs.microsoft.com/sql/t-sql/statements/add-sensitivity-classification-transact-sql
--ENV--
PHPT_EXEC=true
--SKIPIF--
<?php require('skipif_mid-refactor.inc'); ?>
--FILE--
<?php
require_once('MsSetup.inc');
require_once('MsCommon_mid-refactor.inc');
$dataClassKey = 'Data Classification';
$ranks = array(0 => "NONE", 10 => "LOW", 20 => "MEDIUM", 30 => "HIGH", 40 => "CRITICAL");
function testConnAttrCases()
{
// Attribute PDO::SQLSRV_ATTR_DATA_CLASSIFICATION is limited to statement level only
global $server, $databaseName, $driver, $uid, $pwd;
$stmtErr = '*The given attribute is only supported on the PDOStatement object.';
$noSupportErr = '*driver does not support that attribute';
try {
$dsn = getDSN($server, $databaseName, $driver);
$attr = array(PDO::ATTR_ERRMODE => PDO::ERRMODE_EXCEPTION, PDO::SQLSRV_ATTR_DATA_CLASSIFICATION => true);
$conn = new PDO($dsn, $uid, $pwd, $attr);
} catch (PDOException $e) {
if (!fnmatch($stmtErr, $e->getMessage())) {
echo "Connection attribute test (1) unexpected\n";
var_dump($e->getMessage());
}
}
try {
$dsn = getDSN($server, $databaseName, $driver);
$attr = array(PDO::ATTR_ERRMODE => PDO::ERRMODE_EXCEPTION);
$conn = new PDO($dsn, $uid, $pwd, $attr);
$conn->setAttribute(PDO::SQLSRV_ATTR_DATA_CLASSIFICATION, true);
} catch (PDOException $e) {
if (!fnmatch($stmtErr, $e->getMessage())) {
echo "Connection attribute test (2) unexpected\n";
var_dump($e->getMessage());
}
}
try {
$dsn = getDSN($server, $databaseName, $driver);
$attr = array(PDO::ATTR_ERRMODE => PDO::ERRMODE_EXCEPTION);
$conn = new PDO($dsn, $uid, $pwd, $attr);
$conn->getAttribute(PDO::SQLSRV_ATTR_DATA_CLASSIFICATION);
} catch (PDOException $e) {
if (!fnmatch($noSupportErr, $e->getMessage())) {
echo "Connection attribute test (3) unexpected\n";
var_dump($e->getMessage());
}
}
}
function testNotAvailable($conn, $tableName, $isSupported, $driverCapable)
{
// If supported, the query should return a column with no classification
$options = array(PDO::SQLSRV_ATTR_DATA_CLASSIFICATION => true);
$tsql = ($isSupported)? "SELECT PatientId FROM $tableName" : "SELECT * FROM $tableName";
$stmt = $conn->prepare($tsql, $options);
$stmt->execute();
$notAvailableErr = '*Failed to retrieve Data Classification Sensitivity Metadata. If the driver and the server both support the Data Classification feature, check whether the query returns columns with classification information.';
$unexpectedErrorState = '*Failed to retrieve Data Classification Sensitivity Metadata: Check if ODBC driver or the server supports the Data Classification feature.';
$error = ($driverCapable) ? $notAvailableErr : $unexpectedErrorState;
try {
$metadata = $stmt->getColumnMeta(0);
echo "testNotAvailable: expected getColumnMeta to fail\n";
} catch (PDOException $e) {
if (!fnmatch($error, $e->getMessage())) {
echo "testNotAvailable: exception unexpected\n";
var_dump($e->getMessage());
}
}
}
function isDataClassSupported($conn, &$driverCapable)
{
// Check both SQL Server version and ODBC driver version
$msodbcsqlVer = $conn->getAttribute(PDO::ATTR_CLIENT_VERSION)["DriverVer"];
$version = explode(".", $msodbcsqlVer);
// ODBC Driver must be 17.2 or above
$driverCapable = true;
if ($version[0] < 17 || $version[1] < 2) {
$driverCapable = false;
return false;
}
// SQL Server must be SQL Server 2019 or above
$serverVer = $conn->getAttribute(PDO::ATTR_SERVER_VERSION);
if (explode('.', $serverVer)[0] < 15)
return false;
return true;
}
function getRegularMetadata($conn, $tsql)
{
// Run the query without data classification metadata
$stmt1 = $conn->query($tsql);
// Run the query with the attribute set to false
$options = array(PDO::SQLSRV_ATTR_DATA_CLASSIFICATION => false);
$stmt2 = $conn->prepare($tsql, $options);
$stmt2->execute();
// The metadata for each column should be identical
$numCol = $stmt1->columnCount();
for ($i = 0; $i < $numCol; $i++) {
$metadata1 = $stmt1->getColumnMeta($i);
$metadata2 = $stmt2->getColumnMeta($i);
$diff = array_diff($metadata1, $metadata2);
if (!empty($diff)) {
print_r($diff);
}
}
return $stmt1;
}
function verifyClassInfo($rank, $input, $actual)
{
// For simplicity of this test, only one set of sensitivity data (Label, Information Type)
// plus overall rank info
if (count($actual) != 2) {
echo "Expected an array with only two elements\n";
return false;
}
if (count($actual[0]) != 3) {
echo "Expected a Label pair and Information Type pair plus column rank info\n";
return false;
}
// Label should be name and id pair (id should be empty)
if (count($actual[0]['Label']) != 2) {
echo "Expected only two elements for the label\n";
return false;
}
$label = $input[0];
if ($actual[0]['Label']['name'] !== $label || !empty($actual[0]['Label']['id'])){
return false;
}
// Like Label, Information Type should also be name and id pair (id should be empty)
if (count($actual[0]['Information Type']) != 2) {
echo "Expected only two elements for the information type\n";
return false;
}
$info = $input[1];
if ($actual[0]['Information Type']['name'] !== $info || !empty($actual[0]['Information Type']['id'])){
return false;
}
if ($actual[0]['rank'] != $rank) {
return false;
}
if ($actual['rank'] != $rank) {
return false;
}
return true;
}
function assignDataClassification($conn, $tableName, $classData, $rankId = 0)
{
global $ranks;
$rank = ", RANK = $ranks[$rankId]";
// column SSN
$label = $classData[1][0];
$infoType = $classData[1][1];
$sql = "ADD SENSITIVITY CLASSIFICATION TO [$tableName].SSN WITH (LABEL = '$label', INFORMATION_TYPE = '$infoType' $rank)";
$conn->query($sql);
// column BirthDate
$label = $classData[4][0];
$infoType = $classData[4][1];
$sql = "ADD SENSITIVITY CLASSIFICATION TO [$tableName].BirthDate WITH (LABEL = '$label', INFORMATION_TYPE = '$infoType' $rank)";
$conn->query($sql);
}
function compareDataClassification($stmt1, $stmt2, $classData, $rank)
{
global $dataClassKey;
$numCol = $stmt1->columnCount();
$noClassInfo = array($dataClassKey => array());
for ($i = 0; $i < $numCol; $i++) {
$metadata1 = $stmt1->getColumnMeta($i);
$metadata2 = $stmt2->getColumnMeta($i);
// If classification sensitivity data exists, only the
// 'flags' field should be different
foreach ($metadata2 as $key => $value) {
if ($key == 'flags') {
// Is classification input data empty?
if (empty($classData[$i])) {
// Then it should be equivalent to $noClassInfo
if ($value !== $noClassInfo) {
var_dump($value);
}
} else {
// Verify the classification metadata
if (!verifyClassInfo($rank, $classData[$i], $value[$dataClassKey])) {
var_dump($value);
}
}
} else {
// The other fields should be identical
if ($metadata1[$key] !== $value) {
var_dump($value);
}
}
}
}
}
function runBatchQuery($conn, $tableName)
{
global $dataClassKey;
$options = array(PDO::SQLSRV_ATTR_DATA_CLASSIFICATION => true);
$tsql = "SELECT SSN, BirthDate FROM $tableName";
// Run a batch query
$batchQuery = $tsql . ';' . $tsql;
$stmt = $conn->prepare($batchQuery, $options);
$stmt->execute();
$numCol = $stmt->columnCount();
// The metadata returned should be the same
$c = rand(0, $numCol - 1);
$metadata1 = $stmt->getColumnMeta($c);
$stmt->nextRowset();
$metadata2 = $stmt->getColumnMeta($c);
// Check the returned flags
$data1 = $metadata1['flags'];
$data2 = $metadata2['flags'];
if (!array_key_exists($dataClassKey, $data1) || !array_key_exists($dataClassKey, $data2)) {
echo "Metadata returned with no classification data\n";
var_dump($data1);
var_dump($data2);
} else {
$jstr1 = json_encode($data1[$dataClassKey]);
$jstr2 = json_encode($data2[$dataClassKey]);
if ($jstr1 !== $jstr2) {
echo "The JSON encoded strings should be identical\n";
var_dump($jstr1);
var_dump($jstr2);
}
}
}
function checkResults($conn, $stmt, $tableName, $classData, $rank = 0)
{
$tsql = "SELECT * FROM $tableName";
$options = array(PDO::SQLSRV_ATTR_DATA_CLASSIFICATION => true);
$stmt1 = $conn->prepare($tsql, $options);
$stmt1->execute();
compareDataClassification($stmt, $stmt1, $classData, $rank);
// $stmt2 should produce the same result as the previous $stmt1
$stmt2 = $conn->prepare($tsql);
$stmt2->execute();
$stmt2->setAttribute(PDO::SQLSRV_ATTR_DATA_CLASSIFICATION, true);
compareDataClassification($stmt, $stmt2, $classData, $rank);
unset($stmt1);
unset($stmt2);
runBatchQuery($conn, $tableName);
}
///////////////////////////////////////////////////////////////////////////////////////
try {
testConnAttrCases();
$conn = connect();
$driverCapable = true;
$isSupported = isDataClassSupported($conn, $driverCapable);
// Create a test table
$tableName = 'pdoPatients';
$colMeta = array(new ColumnMeta('INT', 'PatientId', 'IDENTITY NOT NULL'),
new ColumnMeta('CHAR(11)', 'SSN'),
new ColumnMeta('NVARCHAR(50)', 'FirstName'),
new ColumnMeta('NVARCHAR(50)', 'LastName'),
new ColumnMeta('DATE', 'BirthDate'));
createTable($conn, $tableName, $colMeta);
// If data classification is supported, then add sensitivity classification metadata
// to columns SSN and Birthdate
$classData = [
array(),
array('Highly Confidential - GDPR', 'Credentials'),
array(),
array(),
array('Confidential Personal Data', 'Birthdays')
];
if ($isSupported) {
assignDataClassification($conn, $tableName, $classData);
}
// Test another error condition
testNotAvailable($conn, $tableName, $isSupported, $driverCapable);
// Run the query without data classification metadata
$tsql = "SELECT * FROM $tableName";
$stmt = getRegularMetadata($conn, $tsql);
// Proceeed to retrieve sensitivity metadata, if supported
if ($isSupported) {
checkResults($conn, $stmt, $tableName, $classData);
// Test another rank (get a random one)
$random = rand(1, 4);
$rank = $random * 10;
trace("Testing with $rank\n");
assignDataClassification($conn, $tableName, $classData, $rank);
checkResults($conn, $stmt, $tableName, $classData, $rank);
}
dropTable($conn, $tableName);
unset($stmt);
unset($conn);
echo "Done\n";
} catch (PDOException $e) {
var_dump($e->getMessage());
}
?>
--EXPECT--
Done

View file

@ -0,0 +1,347 @@
--TEST--
Test data classification feature - retrieving sensitivity metadata if supported
--DESCRIPTION--
If both ODBC and server support this feature, this test verifies that sensitivity metadata can be added and correctly retrieved. If not, it will at least test the new statement attribute and some error cases.
T-SQL reference: https://docs.microsoft.com/sql/t-sql/statements/add-sensitivity-classification-transact-sql
--ENV--
PHPT_EXEC=true
--SKIPIF--
<?php require('skipif_versions_old.inc'); ?>
--FILE--
<?php
$dataClassKey = 'Data Classification';
$ranks = array(0 => "NONE", 10 => "LOW", 20 => "MEDIUM", 30 => "HIGH", 40 => "CRITICAL");
function testErrorCases($conn, $tableName, $isSupported, $driverCapable)
{
// This function will check two error cases:
// (1) if supported, the query should return a column with no classification
$options = array('DataClassification' => true);
$tsql = ($isSupported)? "SELECT PatientId FROM $tableName" : "SELECT * FROM $tableName";
$stmt = sqlsrv_query($conn, $tsql, array(), $options);
if (!$stmt) {
fatalError("testErrorCases (1): failed with sqlsrv_query '$tsql'.\n");
}
$notAvailableErr = '*Failed to retrieve Data Classification Sensitivity Metadata. If the driver and the server both support the Data Classification feature, check whether the query returns columns with classification information.';
$unexpectedErrorState = '*Failed to retrieve Data Classification Sensitivity Metadata: Check if ODBC driver or the server supports the Data Classification feature.';
$error = ($driverCapable) ? $notAvailableErr : $unexpectedErrorState;
$metadata = sqlsrv_field_metadata($stmt);
if ($metadata) {
echo "testErrorCases (1): expected sqlsrv_field_metadata to fail\n";
}
if (!fnmatch($error, sqlsrv_errors()[0]['message'])) {
var_dump(sqlsrv_errors());
}
// (2) call sqlsrv_prepare() with DataClassification but do not execute the stmt
$stmt = sqlsrv_prepare($conn, $tsql, array(), $options);
if (!$stmt) {
fatalError("testErrorCases (2): failed with sqlsrv_prepare '$tsql'.\n");
}
$executeFirstErr = '*The statement must be executed to retrieve Data Classification Sensitivity Metadata.';
$metadata = sqlsrv_field_metadata($stmt);
if ($metadata) {
echo "testErrorCases (2): expected sqlsrv_field_metadata to fail\n";
}
if (!fnmatch($executeFirstErr, sqlsrv_errors()[0]['message'])) {
var_dump(sqlsrv_errors());
}
}
function isDataClassSupported($conn, &$driverCapable)
{
// Check both SQL Server version and ODBC driver version
$msodbcsqlVer = sqlsrv_client_info($conn)['DriverVer'];
$version = explode(".", $msodbcsqlVer);
// ODBC Driver must be 17.2 or above
$driverCapable = true;
if ($version[0] < 17 || $version[1] < 2) {
$driverCapable = false;
return false;
}
// SQL Server must be SQL Server 2019 or above
$serverVer = sqlsrv_server_info($conn)['SQLServerVersion'];
if (explode('.', $serverVer)[0] < 15) {
return false;
}
return true;
}
function getRegularMetadata($conn, $tsql)
{
// Run the query without data classification metadata
$stmt1 = sqlsrv_query($conn, $tsql);
if (!$stmt1) {
fatalError("getRegularMetadata (1): failed in sqlsrv_query.\n");
}
// Run the query with the attribute set to false
$options = array('DataClassification' => false);
$stmt2 = sqlsrv_query($conn, $tsql, array(), $options);
if (!$stmt2) {
fatalError("getRegularMetadata (2): failed in sqlsrv_query.\n");
}
// The metadata for each statement, column by column, should be identical
$numCol = sqlsrv_num_fields($stmt1);
$metadata1 = sqlsrv_field_metadata($stmt1);
$metadata2 = sqlsrv_field_metadata($stmt2);
for ($i = 0; $i < $numCol; $i++) {
$diff = array_diff($metadata1[$i], $metadata2[$i]);
if (!empty($diff)) {
print_r($diff);
}
}
return $stmt1;
}
function verifyClassInfo($rank, $input, $actual)
{
// For simplicity of this test, only one set of sensitivity data. Namely,
// an array with one set of Label (name, id) and Information Type (name, id)
// plus overall rank info
if (count($actual) != 2) {
echo "Expected an array with only two elements\n";
return false;
}
if (count($actual[0]) != 3) {
echo "Expected a Label pair and Information Type pair plus column rank info\n";
return false;
}
// Label should be name and id pair (id should be empty)
if (count($actual[0]['Label']) != 2) {
echo "Expected only two elements for the label\n";
return false;
}
$label = $input[0];
if ($actual[0]['Label']['name'] !== $label || !empty($actual[0]['Label']['id'])){
return false;
}
// Like Label, Information Type should also be name and id pair (id should be empty)
if (count($actual[0]['Information Type']) != 2) {
echo "Expected only two elements for the information type\n";
return false;
}
$info = $input[1];
if ($actual[0]['Information Type']['name'] !== $info || !empty($actual[0]['Information Type']['id'])){
return false;
}
if ($actual[0]['rank'] != $rank) {
return false;
}
if ($actual['rank'] != $rank) {
return false;
}
return true;
}
function assignDataClassification($conn, $tableName, $classData, $rankId = 0)
{
global $ranks;
$rank = ", RANK = $ranks[$rankId]";
// column SSN
$label = $classData[1][0];
$infoType = $classData[1][1];
$sql = "ADD SENSITIVITY CLASSIFICATION TO [$tableName].SSN WITH (LABEL = '$label', INFORMATION_TYPE = '$infoType' $rank)";
$stmt = sqlsrv_query($conn, $sql);
if (!$stmt) {
fatalError("SSN: Add sensitivity $label and $infoType failed.\n");
}
// column BirthDate
$label = $classData[4][0];
$infoType = $classData[4][1];
$sql = "ADD SENSITIVITY CLASSIFICATION TO [$tableName].BirthDate WITH (LABEL = '$label', INFORMATION_TYPE = '$infoType' $rank)";
$stmt = sqlsrv_query($conn, $sql);
if (!$stmt) {
fatalError("BirthDate: Add sensitivity $label and $infoType failed.\n");
}
}
function compareDataClassification($stmt1, $stmt2, $classData, $rank)
{
global $dataClassKey;
$numCol = sqlsrv_num_fields($stmt1);
$metadata1 = sqlsrv_field_metadata($stmt1);
$metadata2 = sqlsrv_field_metadata($stmt2);
// The built-in array_diff_assoc() function compares the keys and values
// of two (or more) arrays, and returns an array that contains the entries
// from array1 that are not present in array2 or array3, etc.
//
// For this test, $metadata2 should have one extra key 'Data Classification',
// which should not be present in $metadata1
//
// If the column does not have sensitivity metadata, the value should be an
// empty array. Otherwise, it should contain an array with one set of
// Label (name, id) and Information Type (name, id)
$noClassInfo = array($dataClassKey => array());
for ($i = 0; $i < $numCol; $i++) {
$diff = array_diff_assoc($metadata2[$i], $metadata1[$i]);
// Is classification input data empty?
if (empty($classData[$i])) {
// Then it should be equivalent to $noClassInfo
if ($diff !== $noClassInfo) {
var_dump($diff);
}
} else {
// Verify the classification metadata
if (!verifyClassInfo($rank, $classData[$i], $diff[$dataClassKey])) {
var_dump($diff);
}
}
}
}
function checkResults($conn, $stmt, $tableName, $classData, $rank = 0)
{
$tsql = "SELECT * FROM $tableName";
$options = array('DataClassification' => true);
$stmt1 = sqlsrv_prepare($conn, $tsql, array(), $options);
if (!$stmt1) {
fatalError("Error when calling sqlsrv_prepare '$tsql'.\n");
}
if (!sqlsrv_execute($stmt1)) {
fatalError("Error in executing statement.\n");
}
compareDataClassification($stmt, $stmt1, $classData, $rank);
sqlsrv_free_stmt($stmt1);
// $stmt2 should produce the same result as the previous $stmt1
$stmt2 = sqlsrv_query($conn, $tsql, array(), $options);
if (!$stmt2) {
fatalError("Error when calling sqlsrv_query '$tsql'.\n");
}
compareDataClassification($stmt, $stmt2, $classData, $rank);
sqlsrv_free_stmt($stmt2);
runBatchQuery($conn, $tableName);
}
function runBatchQuery($conn, $tableName)
{
global $dataClassKey;
$options = array('DataClassification' => true);
$tsql = "SELECT SSN, BirthDate FROM $tableName";
$batchQuery = $tsql . ';' . $tsql;
$stmt = sqlsrv_query($conn, $batchQuery, array(), $options);
if (!$stmt) {
fatalError("Error when calling sqlsrv_query '$tsql'.\n");
}
$numCol = sqlsrv_num_fields($stmt);
$c = rand(0, $numCol - 1);
$metadata1 = sqlsrv_field_metadata($stmt);
if (!$metadata1 || !array_key_exists($dataClassKey, $metadata1[$c])) {
fatalError("runBatchQuery(1): failed to get metadata");
}
$result = sqlsrv_next_result($stmt);
if (is_null($result) || !$result) {
fatalError("runBatchQuery: failed to get next result");
}
$metadata2 = sqlsrv_field_metadata($stmt);
if (!$metadata2 || !array_key_exists($dataClassKey, $metadata2[$c])) {
fatalError("runBatchQuery(2): failed to get metadata");
}
$jstr1 = json_encode($metadata1[$c][$dataClassKey]);
$jstr2 = json_encode($metadata2[$c][$dataClassKey]);
if ($jstr1 !== $jstr2) {
echo "The JSON encoded strings should be identical\n";
var_dump($jstr1);
var_dump($jstr2);
}
}
///////////////////////////////////////////////////////////////////////////////////////
require_once('MsCommon.inc');
$conn = AE\connect();
if (!$conn) {
fatalError("Failed to connect.\n");
}
$driverCapable = true;
$isSupported = isDataClassSupported($conn, $driverCapable);
// Create a test table
$tableName = 'srvPatients';
$colMeta = array(new AE\ColumnMeta('INT', 'PatientId', 'IDENTITY NOT NULL'),
new AE\ColumnMeta('CHAR(11)', 'SSN'),
new AE\ColumnMeta('NVARCHAR(50)', 'FirstName'),
new AE\ColumnMeta('NVARCHAR(50)', 'LastName'),
new AE\ColumnMeta('DATE', 'BirthDate'));
AE\createTable($conn, $tableName, $colMeta);
// If data classification is supported, then add sensitivity classification metadata
// to columns SSN and Birthdate
$classData = [
array(),
array('Highly Confidential - GDPR', 'Credentials'),
array(),
array(),
array('Confidential Personal Data', 'Birthdays')
];
if ($isSupported) {
assignDataClassification($conn, $tableName, $classData);
}
testErrorCases($conn, $tableName, $isSupported, $driverCapable);
// Run the query without data classification metadata
$tsql = "SELECT * FROM $tableName";
$stmt = getRegularMetadata($conn, $tsql);
// Proceeed to retrieve sensitivity metadata, if supported
if ($isSupported) {
checkResults($conn, $stmt, $tableName, $classData);
// Test another rank (get a random one)
$random = rand(1, 4);
$rank = $random * 10;
trace("Testing with $rank\n");
assignDataClassification($conn, $tableName, $classData, $rank);
checkResults($conn, $stmt, $tableName, $classData, $rank);
}
sqlsrv_free_stmt($stmt);
dropTable($conn, $tableName);
sqlsrv_close($conn);
echo "Done\n";
?>
--EXPECT--
Done