From 748fbe037d1195da460e4561704e28208b908b68 Mon Sep 17 00:00:00 2001 From: Nitish Date: Mon, 24 Feb 2025 14:15:49 +0530 Subject: [PATCH] added different output locations from single and compound tests --- .../executor/TestSparkExecutorsCompound.java | 2 +- .../executor/TestSparkExecutorsSingle.java | 4 +- .../executor/compound/configSparkIntTest.json | 106 ++++++++++++++++++ .../{ => single}/configSparkIntTest.json | 4 +- .../{ => single}/configSparkLinkTest.json | 4 +- 5 files changed, 113 insertions(+), 7 deletions(-) create mode 100644 spark/core/src/test/resources/zingg/spark/core/executor/compound/configSparkIntTest.json rename spark/core/src/test/resources/zingg/spark/core/executor/{ => single}/configSparkIntTest.json (94%) rename spark/core/src/test/resources/zingg/spark/core/executor/{ => single}/configSparkLinkTest.json (94%) diff --git a/spark/core/src/test/java/zingg/spark/core/executor/TestSparkExecutorsCompound.java b/spark/core/src/test/java/zingg/spark/core/executor/TestSparkExecutorsCompound.java index 4b1019894..c699b6cd6 100644 --- a/spark/core/src/test/java/zingg/spark/core/executor/TestSparkExecutorsCompound.java +++ b/spark/core/src/test/java/zingg/spark/core/executor/TestSparkExecutorsCompound.java @@ -25,7 +25,7 @@ @ExtendWith(TestSparkBase.class) public class TestSparkExecutorsCompound extends TestExecutorsCompound,Row,Column,DataType> { - protected static final String CONFIG_FILE = "zingg/spark/core/executor/configSparkIntTest.json"; + protected static final String CONFIG_FILE = "zingg/spark/core/executor/compound/configSparkIntTest.json"; protected static final String TEST_DATA_FILE = "zingg/spark/core/executor/test.csv"; public static final Log LOG = LogFactory.getLog(TestSparkExecutorsCompound.class); diff --git a/spark/core/src/test/java/zingg/spark/core/executor/TestSparkExecutorsSingle.java b/spark/core/src/test/java/zingg/spark/core/executor/TestSparkExecutorsSingle.java index 8be014290..1956e82f1 100644 --- a/spark/core/src/test/java/zingg/spark/core/executor/TestSparkExecutorsSingle.java +++ b/spark/core/src/test/java/zingg/spark/core/executor/TestSparkExecutorsSingle.java @@ -29,8 +29,8 @@ @ExtendWith(TestSparkBase.class) public class TestSparkExecutorsSingle extends TestExecutorsSingle,Row,Column,DataType> { - protected static final String CONFIG_FILE = "zingg/spark/core/executor/configSparkIntTest.json"; - protected static final String CONFIGLINK_FILE = "zingg/spark/core/executor/configSparkLinkTest.json"; + protected static final String CONFIG_FILE = "zingg/spark/core/executor/single/configSparkIntTest.json"; + protected static final String CONFIGLINK_FILE = "zingg/spark/core/executor/single/configSparkLinkTest.json"; protected static final String TEST1_DATA_FILE = "zingg/spark/core/executor/test1.csv"; protected static final String TEST2_DATA_FILE = "zingg/spark/core/executor/test2.csv"; private final SparkSession sparkSession; diff --git a/spark/core/src/test/resources/zingg/spark/core/executor/compound/configSparkIntTest.json b/spark/core/src/test/resources/zingg/spark/core/executor/compound/configSparkIntTest.json new file mode 100644 index 000000000..46a26528d --- /dev/null +++ b/spark/core/src/test/resources/zingg/spark/core/executor/compound/configSparkIntTest.json @@ -0,0 +1,106 @@ +{ + "trainingSamples" : [{ + "name":"trainingPos", + "format":"csv", + "props": { + "location": "./zingg/spark/core/executor/training.csv", + "delimiter": ",", + "header":false, + "badRecordsPath":"/tmp/bad" + }, + "schema": "z_cluster string, z_ismatch integer, id string, fname string, lname string, stNo string, add1 string, add2 string, city string, areacode string, state string, dob string, ssn string" + }], + "fieldDefinition":[ + { + "fieldName" : "id", + "matchType" : "dont_use", + "fields" : "id", + "dataType": "string" + }, + { + "fieldName" : "fname", + "matchType" : "fuzzy", + "fields" : "fname", + "dataType": "string" + }, + { + "fieldName" : "lname", + "matchType" : "fuzzy", + "fields" : "lname", + "dataType": "string" + }, + { + "fieldName" : "stNo", + "matchType": "fuzzy", + "fields" : "stNo", + "dataType": "string" + }, + { + "fieldName" : "add1", + "matchType": "fuzzy", + "fields" : "add1", + "dataType": "string" + }, + { + "fieldName" : "add2", + "matchType": "fuzzy", + "fields" : "add2", + "dataType": "string" + }, + { + "fieldName" : "city", + "matchType": "fuzzy", + "fields" : "city", + "dataType": "string" + }, + { + "fieldName" : "areacode", + "matchType": "fuzzy", + "fields" : "areacode", + "dataType": "string" + }, + { + "fieldName" : "state", + "matchType": "fuzzy", + "fields" : "state", + "dataType": "string" + }, + { + "fieldName" : "dob", + "matchType": "fuzzy", + "fields" : "dob", + "dataType": "string" + }, + { + "fieldName" : "ssn", + "matchType": "fuzzy", + "fields" : "ssn", + "dataType": "string" + } + ], + "output" : [{ + "name":"output", + "format":"csv", + "props": { + "location": "/tmp/junit_integration_spark/compound/zinggOutput", + "delimiter": ",", + "header":true + } + }], + "data" : [{ + "name":"test", + "format":"csv", + "props": { + "location": "./zingg/spark/core/executor/test.csv", + "delimiter": ",", + "header":false + }, + "schema": "id string, fname string, lname string, stNo string, add1 string, add2 string, city string, state string, areacode string, dob string, ssn string" + } + ], + "labelDataSampleSize" : 0.5, + "numPartitions":4, + "modelId": "junit_integration_spark", + "zinggDir": "/tmp/junit_integration_spark/compound" + +} diff --git a/spark/core/src/test/resources/zingg/spark/core/executor/configSparkIntTest.json b/spark/core/src/test/resources/zingg/spark/core/executor/single/configSparkIntTest.json similarity index 94% rename from spark/core/src/test/resources/zingg/spark/core/executor/configSparkIntTest.json rename to spark/core/src/test/resources/zingg/spark/core/executor/single/configSparkIntTest.json index a71811d88..0f8d45b33 100644 --- a/spark/core/src/test/resources/zingg/spark/core/executor/configSparkIntTest.json +++ b/spark/core/src/test/resources/zingg/spark/core/executor/single/configSparkIntTest.json @@ -82,7 +82,7 @@ "name":"output", "format":"csv", "props": { - "location": "/tmp/junit_integration_spark/zinggOutput", + "location": "/tmp/junit_integration_spark/single/zinggOutput", "delimiter": ",", "header":true } @@ -101,6 +101,6 @@ "labelDataSampleSize" : 0.5, "numPartitions":4, "modelId": "junit_integration_spark", - "zinggDir": "/tmp/junit_integration_spark" + "zinggDir": "/tmp/junit_integration_spark/single" } diff --git a/spark/core/src/test/resources/zingg/spark/core/executor/configSparkLinkTest.json b/spark/core/src/test/resources/zingg/spark/core/executor/single/configSparkLinkTest.json similarity index 94% rename from spark/core/src/test/resources/zingg/spark/core/executor/configSparkLinkTest.json rename to spark/core/src/test/resources/zingg/spark/core/executor/single/configSparkLinkTest.json index 29f776bf3..a67827271 100644 --- a/spark/core/src/test/resources/zingg/spark/core/executor/configSparkLinkTest.json +++ b/spark/core/src/test/resources/zingg/spark/core/executor/single/configSparkLinkTest.json @@ -65,7 +65,7 @@ "name":"output", "format":"csv", "props": { - "location": "/tmp/junit_integration_spark/zinggOutput", + "location": "/tmp/junit_integration_spark/single/zinggOutput", "delimiter": ",", "header":true } @@ -94,6 +94,6 @@ "labelDataSampleSize" : 0.5, "numPartitions":4, "modelId": "junit_integration_spark", - "zinggDir": "/tmp/junit_integration_spark" + "zinggDir": "/tmp/junit_integration_spark/single" }