From ad42c87e8bfbffabbee3fb3a070b228f3db92067 Mon Sep 17 00:00:00 2001 From: pdobacz <5735525+pdobacz@users.noreply.github.com> Date: Mon, 30 May 2022 11:12:11 +0200 Subject: [PATCH 1/2] Make sure normalization of cast(1.01 as real) works --- src/OpenDiffix.Core.Tests/Normalizer.Tests.fs | 6 ++++++ src/OpenDiffix.Core/Expression.fs | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/src/OpenDiffix.Core.Tests/Normalizer.Tests.fs b/src/OpenDiffix.Core.Tests/Normalizer.Tests.fs index 98060766..067dd617 100644 --- a/src/OpenDiffix.Core.Tests/Normalizer.Tests.fs +++ b/src/OpenDiffix.Core.Tests/Normalizer.Tests.fs @@ -107,6 +107,12 @@ let ``normalize casts (2)`` () = "SELECT cast(cast(age AS integer) as real) AS c FROM table" "SELECT cast(age as real) as c FROM table" +[] +let ``normalize casts (3)`` () = + equivalentQueries // + "SELECT cast(1.01 as real) AS c FROM table" + "SELECT 1.01 as c FROM table" + [] let ``normalize ranges (1)`` () = equivalentQueries // diff --git a/src/OpenDiffix.Core/Expression.fs b/src/OpenDiffix.Core/Expression.fs index 624b59b3..c5ae1359 100644 --- a/src/OpenDiffix.Core/Expression.fs +++ b/src/OpenDiffix.Core/Expression.fs @@ -204,6 +204,10 @@ let rec evaluateScalarFunction fn args = | Cast, [ Real r; String "text" ] -> r.ToString(doubleStyle) |> String | Cast, [ Boolean b; String "text" ] -> b.ToString().ToLower() |> String + | Cast, [ Integer i; String "integer" ] -> Integer i + | Cast, [ Real r; String "real" ] -> Real r + | Cast, [ Boolean b; String "boolean" ] -> Boolean b + | Cast, [ String s; String "string" ] -> String s | _ -> failwith $"Invalid usage of scalar function '%A{fn}'." /// Evaluates the result sequence of a set function invocation. From 8a8ee89da217b0cd6c85874f6bda6740ad075ff0 Mon Sep 17 00:00:00 2001 From: Cristian Berneanu Date: Wed, 8 Jun 2022 10:01:39 +0300 Subject: [PATCH 2/2] Revert "Add support for simple `WHERE` clauses." This reverts commit 2b22d101425c5387b8af024e1acb63b1660ee216. --- src/OpenDiffix.Core.Tests/Aggregator.Tests.fs | 2 +- src/OpenDiffix.Core.Tests/Analyzer.Tests.fs | 38 ++++-------------- src/OpenDiffix.Core.Tests/Anonymizer.Tests.fs | 2 +- src/OpenDiffix.Core.Tests/Executor.Tests.fs | 2 +- src/OpenDiffix.Core.Tests/Expression.Tests.fs | 2 +- src/OpenDiffix.Core.Tests/NodeUtils.Tests.fs | 4 +- src/OpenDiffix.Core.Tests/Planner.Tests.fs | 2 +- .../QueryEngine.Tests.fs | 13 ------- src/OpenDiffix.Core/Aggregator.fs | 4 +- src/OpenDiffix.Core/Analyzer.fs | 39 +++++-------------- src/OpenDiffix.Core/Bucket.fs | 10 +---- src/OpenDiffix.Core/CommonTypes.fs | 2 +- src/OpenDiffix.Core/NoiseLayers.fs | 4 +- src/OpenDiffix.Core/QueryValidator.fs | 9 ++++- 14 files changed, 37 insertions(+), 96 deletions(-) diff --git a/src/OpenDiffix.Core.Tests/Aggregator.Tests.fs b/src/OpenDiffix.Core.Tests/Aggregator.Tests.fs index b2f1cad7..84720c58 100644 --- a/src/OpenDiffix.Core.Tests/Aggregator.Tests.fs +++ b/src/OpenDiffix.Core.Tests/Aggregator.Tests.fs @@ -87,7 +87,7 @@ let aggContext = let testAnonAggregatorMerging fn hasValueArg = let random = makeRandom fn hasValueArg - let ctx = aggContext, Some { BucketSeed = 0UL; BaseLabels = [] } + let ctx = aggContext, Some { BucketSeed = 0UL } let testPair numAids (length1, length2) = let makeArgs = makeAnonArgs hasValueArg random numAids diff --git a/src/OpenDiffix.Core.Tests/Analyzer.Tests.fs b/src/OpenDiffix.Core.Tests/Analyzer.Tests.fs index cdd65835..644779ba 100644 --- a/src/OpenDiffix.Core.Tests/Analyzer.Tests.fs +++ b/src/OpenDiffix.Core.Tests/Analyzer.Tests.fs @@ -313,14 +313,11 @@ type Tests(db: DBFixture) = let assertDirectQueryFails = assertQueryFails Direct let assertUntrustedQueryFails = assertQueryFails PublishUntrusted - let assertSqlSeedWithFilter query (seedMaterials: string seq) baseLabels = + let assertSqlSeed query (seedMaterials: string seq) = let expectedSeed = Hash.strings 0UL seedMaterials (analyzeTrustedQuery query).AnonymizationContext - |> should equal (Some { BucketSeed = expectedSeed; BaseLabels = baseLabels }) - - let assertSqlSeed query (seedMaterials: string seq) = - assertSqlSeedWithFilter query seedMaterials [] + |> should equal (Some { BucketSeed = expectedSeed }) let assertEqualAnonContexts query1 query2 = (analyzeTrustedQuery query1).AnonymizationContext @@ -389,18 +386,10 @@ type Tests(db: DBFixture) = analyzeTrustedQuery "SELECT count(*) FROM customers GROUP BY city HAVING length(city) > 3" [] - let ``Reject unsupported WHERE clause in anonymizing subqueries`` () = - assertTrustedQueryFails - "SELECT count(*) FROM customers WHERE first_name <> ''" - "Only equalities between a generalization and a constant are allowed as filters in anonymizing queries." - + let ``Reject WHERE clause in anonymizing subqueries`` () = assertTrustedQueryFails - "SELECT count(*) FROM customers WHERE age = 20 OR city = 'London'" - "Only equalities between a generalization and a constant are allowed as filters in anonymizing queries." - - [] - let ``Allow supported WHERE clause in anonymizing subqueries`` () = - analyzeTrustedQuery "SELECT count(*) FROM customers WHERE floor_by(age, 10) = 20 AND city = 'London'" + "SELECT count(*) FROM customers WHERE first_name=''" + "WHERE in anonymizing queries is not currently supported." [] let ``Don't validate not anonymizing queries for unsupported anonymization features`` () = @@ -452,11 +441,11 @@ type Tests(db: DBFixture) = let ``Detect queries with disallowed bucket functions calls`` () = assertTrustedQueryFails "SELECT round(2, age) from customers" - "Primary argument for a generalization expression has to be a simple column reference." + "Primary argument for a bucket function has to be a simple column reference." assertTrustedQueryFails "SELECT round(age, age) from customers" - "Secondary arguments for a generalization expression have to be constants." + "Secondary arguments for a bucket function have to be constants." [] let ``Default SQL seed from non-anonymizing queries`` () = @@ -501,19 +490,6 @@ type Tests(db: DBFixture) = let ``Default SQL seed from non-anonymizing rounding cast`` () = assertNoAnonContext "SELECT cast(price AS integer) FROM products" - [] - let ``SQL seed from single filter`` () = - assertSqlSeedWithFilter - "SELECT COUNT(*) FROM customers WHERE substring(city, 1, 2) = 'Lo'" - [ "substring,customers.city,1,2" ] - [ String "Lo" ] - - let ``SQL seed from multiple filters`` () = - assertSqlSeedWithFilter - "SELECT COUNT(*) FROM customers WHERE age = 20 AND city = 'London'" - [ "customers.age"; "customers.city" ] - [ Integer 20L; String "London" ] - [] let ``Constant bucket labels are ignored`` () = assertEqualAnonContexts diff --git a/src/OpenDiffix.Core.Tests/Anonymizer.Tests.fs b/src/OpenDiffix.Core.Tests/Anonymizer.Tests.fs index 0c7f199f..764c8fda 100644 --- a/src/OpenDiffix.Core.Tests/Anonymizer.Tests.fs +++ b/src/OpenDiffix.Core.Tests/Anonymizer.Tests.fs @@ -40,7 +40,7 @@ let anonParams = let aggContext = { AnonymizationParams = anonParams; GroupingLabels = [||]; Aggregators = [||] } let evaluateAggregator fn args = - evaluateAggregator (aggContext, Some { BucketSeed = 0UL; BaseLabels = [] }) fn args + evaluateAggregator (aggContext, Some { BucketSeed = 0UL }) fn args let distinctDiffixCount = DiffixCount, { AggregateOptions.Default with Distinct = true } let diffixCount = DiffixCount, { AggregateOptions.Default with Distinct = false } diff --git a/src/OpenDiffix.Core.Tests/Executor.Tests.fs b/src/OpenDiffix.Core.Tests/Executor.Tests.fs index fd88ea2a..13350627 100644 --- a/src/OpenDiffix.Core.Tests/Executor.Tests.fs +++ b/src/OpenDiffix.Core.Tests/Executor.Tests.fs @@ -25,7 +25,7 @@ type Tests(db: DBFixture) = let countDistinct expression = FunctionExpr(AggregateFunction(Count, { Distinct = true; OrderBy = [] }), [ expression ]) - let anonContext = { BucketSeed = 0UL; BaseLabels = [] } + let anonContext = { BucketSeed = 0UL } let queryContext = QueryContext.makeWithDataProvider db.DataProvider diff --git a/src/OpenDiffix.Core.Tests/Expression.Tests.fs b/src/OpenDiffix.Core.Tests/Expression.Tests.fs index a272b6f3..436ab8e1 100644 --- a/src/OpenDiffix.Core.Tests/Expression.Tests.fs +++ b/src/OpenDiffix.Core.Tests/Expression.Tests.fs @@ -548,7 +548,7 @@ let aggContext = } let evaluateAggregator aggSpec args = - evaluateAggregator (aggContext, Some { BucketSeed = 0UL; BaseLabels = [] }) aggSpec args testRows + evaluateAggregator (aggContext, Some { BucketSeed = 0UL }) aggSpec args testRows [] let ``evaluate scalar expressions`` () = diff --git a/src/OpenDiffix.Core.Tests/NodeUtils.Tests.fs b/src/OpenDiffix.Core.Tests/NodeUtils.Tests.fs index d5c075bf..5cbf7edc 100644 --- a/src/OpenDiffix.Core.Tests/NodeUtils.Tests.fs +++ b/src/OpenDiffix.Core.Tests/NodeUtils.Tests.fs @@ -29,7 +29,7 @@ let selectQuery = Having = expression OrderBy = [ OrderBy(expression, Ascending, NullsFirst) ] Limit = None - AnonymizationContext = Some { BucketSeed = 0UL; BaseLabels = [] } + AnonymizationContext = Some { BucketSeed = 0UL } } let selectQueryNegative = @@ -41,7 +41,7 @@ let selectQueryNegative = Having = negativeExpression OrderBy = [ OrderBy(negativeExpression, Ascending, NullsFirst) ] Limit = None - AnonymizationContext = Some { BucketSeed = 0UL; BaseLabels = [] } + AnonymizationContext = Some { BucketSeed = 0UL } } [] diff --git a/src/OpenDiffix.Core.Tests/Planner.Tests.fs b/src/OpenDiffix.Core.Tests/Planner.Tests.fs index a236030e..3d022dcb 100644 --- a/src/OpenDiffix.Core.Tests/Planner.Tests.fs +++ b/src/OpenDiffix.Core.Tests/Planner.Tests.fs @@ -26,7 +26,7 @@ let emptySelect = Having = constTrue OrderBy = [] Limit = None - AnonymizationContext = Some { BucketSeed = 0UL; BaseLabels = [] } + AnonymizationContext = Some { BucketSeed = 0UL } } let column index = diff --git a/src/OpenDiffix.Core.Tests/QueryEngine.Tests.fs b/src/OpenDiffix.Core.Tests/QueryEngine.Tests.fs index 8a4a09e8..5eb0a2d9 100644 --- a/src/OpenDiffix.Core.Tests/QueryEngine.Tests.fs +++ b/src/OpenDiffix.Core.Tests/QueryEngine.Tests.fs @@ -198,19 +198,6 @@ type Tests(db: DBFixture) = "SELECT count(*) FROM customers_small GROUP BY city, city ORDER BY 1" "SELECT count(*) FROM customers_small GROUP BY city ORDER BY 1" - [] - let ``Filtering and grouping doesn't change results`` () = - equivalentQueries - "SELECT count(*) FROM customers GROUP BY city HAVING city = 'Berlin'" - "SELECT count(*) FROM customers WHERE city = 'Berlin'" - - equivalentQueries - "SELECT count(*) FROM customers GROUP BY city, round_by(age, 10) HAVING city = 'Berlin' AND round_by(age, 10) = 20" - "SELECT count(*) FROM customers WHERE city = 'Berlin' AND round_by(age, 10) = 20" - - equivalentQueries - "SELECT count(*) FROM customers WHERE round_by(age, 10) = 20 GROUP BY city HAVING city = 'Berlin'" - "SELECT count(*) FROM customers WHERE city = 'Berlin' AND round_by(age, 10) = 20" [] let ``Anonymizing subquery`` () = diff --git a/src/OpenDiffix.Core/Aggregator.fs b/src/OpenDiffix.Core/Aggregator.fs index 3059e8a3..e3d47b5e 100644 --- a/src/OpenDiffix.Core/Aggregator.fs +++ b/src/OpenDiffix.Core/Aggregator.fs @@ -175,7 +175,7 @@ type private DiffixCount() = let anonContext = unwrapAnonContext anonContext let minCount = - if Array.isEmpty aggContext.GroupingLabels && List.isEmpty anonContext.BaseLabels then + if Array.isEmpty aggContext.GroupingLabels then 0L else int64 aggContext.AnonymizationParams.Suppression.LowThreshold @@ -236,7 +236,7 @@ type private DiffixCountDistinct() = let anonContext = unwrapAnonContext anonContext let minCount = - if Array.isEmpty aggContext.GroupingLabels && List.isEmpty anonContext.BaseLabels then + if Array.isEmpty aggContext.GroupingLabels then 0L else int64 aggContext.AnonymizationParams.Suppression.LowThreshold diff --git a/src/OpenDiffix.Core/Analyzer.fs b/src/OpenDiffix.Core/Analyzer.fs index 53f23a8d..20a354c4 100644 --- a/src/OpenDiffix.Core/Analyzer.fs +++ b/src/OpenDiffix.Core/Analyzer.fs @@ -354,42 +354,23 @@ let private compileQuery anonParams (query: SelectQuery) = // in `compileAnonymizingQuery` and explicit use of noisy aggregators like `diffix_low_count`. // If there aren't any, we also don't need to do the validations which are done deep in `computeSQLSeed`. if hasAnonymizingAggregators query then - let normalizedBucketExpressions = - (query.GroupBy @ gatherBucketExpressionsFromFilter query.Where) - |> Seq.map normalizeBucketExpression + let rangeColumns = collectRangeColumns anonParams query.From + let normalizedBucketLabelExpressions = query.GroupBy |> Seq.map (normalizeBucketLabelExpression) - QueryValidator.validateGeneralizations anonParams.AccessLevel normalizedBucketExpressions - - let sqlSeed = NoiseLayers.computeSQLSeed rangeColumns normalizedBucketExpressions - let baseLabels = gatherBucketLabelsFromFilter query.Where - let anonContext = Some { BucketSeed = sqlSeed; BaseLabels = baseLabels } - { query with AnonymizationContext = anonContext } + QueryValidator.validateGeneralizations anonParams.AccessLevel normalizedBucketLabelExpressions + let sqlSeed = NoiseLayers.computeSQLSeed rangeColumns normalizedBucketLabelExpressions + { query with AnonymizationContext = Some { BucketSeed = sqlSeed } } else query -let rec private gatherBucketExpressionsFromFilter filter = - match filter with - | Constant (Boolean true) -> [] - | FunctionExpr (ScalarFunction And, args) -> args |> List.collect gatherBucketExpressionsFromFilter - | FunctionExpr (ScalarFunction Eq, [ bucketExpression; Constant _ ]) -> [ bucketExpression ] - | _ -> - failwith "Only equalities between a generalization and a constant are allowed as filters in anonymizing queries." - -let rec private gatherBucketLabelsFromFilter filter = - match filter with - | Constant (Boolean true) -> [] - | FunctionExpr (ScalarFunction And, args) -> args |> List.collect gatherBucketLabelsFromFilter - | FunctionExpr (ScalarFunction Eq, [ _; Constant bucketLabel ]) -> [ bucketLabel ] - | _ -> - failwith "Only equalities between a generalization and a constant are allowed as filters in anonymizing queries." - -let rec private normalizeBucketExpression expression = + +let rec private normalizeBucketLabelExpression expression = match expression with | FunctionExpr (ScalarFunction Cast, [ expression; Constant (String "integer") ]) when Expression.typeOf expression = RealType -> - FunctionExpr(ScalarFunction RoundBy, [ normalizeBucketExpression expression; 1.0 |> Real |> Constant ]) - | FunctionExpr (ScalarFunction Cast, [ expression; _type ]) -> normalizeBucketExpression expression + FunctionExpr(ScalarFunction RoundBy, [ normalizeBucketLabelExpression expression; 1.0 |> Real |> Constant ]) + | FunctionExpr (ScalarFunction Cast, [ expression; _type ]) -> normalizeBucketLabelExpression expression | FunctionExpr (ScalarFunction fn, args) -> let fn, extraArgs = match fn with @@ -398,7 +379,7 @@ let rec private normalizeBucketExpression expression = | Round -> RoundBy, [ 1.0 |> Real |> Constant ] | _ -> fn, [] - FunctionExpr(ScalarFunction fn, List.map normalizeBucketExpression args @ extraArgs) + FunctionExpr(ScalarFunction fn, List.map normalizeBucketLabelExpression args @ extraArgs) | _ -> expression // ---------------------------------------------------------------- diff --git a/src/OpenDiffix.Core/Bucket.fs b/src/OpenDiffix.Core/Bucket.fs index 698802eb..df6b550b 100644 --- a/src/OpenDiffix.Core/Bucket.fs +++ b/src/OpenDiffix.Core/Bucket.fs @@ -4,15 +4,7 @@ let private addValuesToSeed seed (values: Value seq) = values |> Seq.map Value.toString |> Hash.strings seed let make group aggregators anonymizationContext = - let anonContextUpdater = - fun context -> - { context with - BucketSeed = - group - |> Array.toList - |> List.append context.BaseLabels - |> addValuesToSeed context.BucketSeed - } + let anonContextUpdater = fun context -> { context with BucketSeed = addValuesToSeed context.BucketSeed group } { Group = group diff --git a/src/OpenDiffix.Core/CommonTypes.fs b/src/OpenDiffix.Core/CommonTypes.fs index 5f08403f..6d371040 100644 --- a/src/OpenDiffix.Core/CommonTypes.fs +++ b/src/OpenDiffix.Core/CommonTypes.fs @@ -205,7 +205,7 @@ type Plan = // Executor // ---------------------------------------------------------------- -type AnonymizationContext = { BucketSeed: Hash; BaseLabels: Value list } +type AnonymizationContext = { BucketSeed: Hash } type AggregationContext = { diff --git a/src/OpenDiffix.Core/NoiseLayers.fs b/src/OpenDiffix.Core/NoiseLayers.fs index c316d1f0..535e717e 100644 --- a/src/OpenDiffix.Core/NoiseLayers.fs +++ b/src/OpenDiffix.Core/NoiseLayers.fs @@ -33,7 +33,7 @@ let private collectSeedMaterials rangeColumns expression = // Public API // ---------------------------------------------------------------- -let computeSQLSeed rangeColumns normalizedBucketExpressions = - normalizedBucketExpressions +let computeSQLSeed rangeColumns normalizedBucketLabelExpressions = + normalizedBucketLabelExpressions |> Seq.map (collectSeedMaterials rangeColumns) |> Hash.strings 0UL diff --git a/src/OpenDiffix.Core/QueryValidator.fs b/src/OpenDiffix.Core/QueryValidator.fs index b707a9b3..c599b02e 100644 --- a/src/OpenDiffix.Core/QueryValidator.fs +++ b/src/OpenDiffix.Core/QueryValidator.fs @@ -53,13 +53,17 @@ let private validateSelectTarget (selectQuery: SelectQuery) = | SubQuery _ -> failwith "Subqueries in anonymizing queries are not currently supported." | _ -> () +let private validateNoWhere (selectQuery: SelectQuery) = + if selectQuery.Where <> Constant(Boolean true) then + failwith "WHERE in anonymizing queries is not currently supported." + let private validateGeneralization accessLevel expression = if accessLevel <> Direct then match expression with | FunctionExpr (ScalarFunction _, primaryArg :: _) when not (Expression.isColumnReference primaryArg) -> - failwith "Primary argument for a generalization expression has to be a simple column reference." + failwith "Primary argument for a bucket function has to be a simple column reference." | FunctionExpr (ScalarFunction _, _ :: secondaryArgs) when List.exists (Expression.isConstant >> not) secondaryArgs -> - failwith "Secondary arguments for a generalization expression have to be constants." + failwith "Secondary arguments for a bucket function have to be constants." | _ -> () if accessLevel = PublishUntrusted then @@ -79,6 +83,7 @@ let validateDirectQuery (selectQuery: SelectQuery) = validateSingleLowCount sele let validateAnonymizingQuery (selectQuery: SelectQuery) = validateOnlyCount selectQuery allowedCountUsage selectQuery + validateNoWhere selectQuery validateSelectTarget selectQuery let validateGeneralizations accessLevel expressions =