-
Notifications
You must be signed in to change notification settings - Fork 2.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
- Loading branch information
Showing
14 changed files
with
461 additions
and
41 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
32 changes: 32 additions & 0 deletions
32
hudi-aws/src/main/java/org/apache/hudi/aws/sync/util/GlueFilterGenVisitor.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.hudi.aws.sync.util; | ||
|
||
import org.apache.hudi.hive.util.FilterGenVisitor; | ||
|
||
public class GlueFilterGenVisitor extends FilterGenVisitor { | ||
|
||
@Override | ||
protected String quoteStringLiteral(String value) { | ||
// Glue uses jSQLParser. | ||
// https://jsqlparser.github.io/JSqlParser/usage.html#define-the-parser-features | ||
return "'" + (value.contains("'") ? value.replaceAll("'", "''") : value) + "'"; | ||
} | ||
|
||
} |
29 changes: 29 additions & 0 deletions
29
hudi-aws/src/main/java/org/apache/hudi/aws/sync/util/GluePartitionFilterGenerator.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.hudi.aws.sync.util; | ||
|
||
import org.apache.hudi.expression.Expression; | ||
import org.apache.hudi.hive.util.PartitionFilterGenerator; | ||
|
||
public class GluePartitionFilterGenerator extends PartitionFilterGenerator { | ||
|
||
protected String generateFilterString(Expression filter) { | ||
return filter.accept(new GlueFilterGenVisitor()); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
133 changes: 133 additions & 0 deletions
133
hudi-aws/src/test/java/org/apache/hudi/aws/sync/ITTestGluePartitionPushdown.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.hudi.aws.sync; | ||
|
||
import org.apache.hadoop.conf.Configuration; | ||
import org.apache.hadoop.fs.FileSystem; | ||
import org.apache.hadoop.fs.Path; | ||
import org.apache.hudi.common.config.TypedProperties; | ||
import org.apache.hudi.common.model.HoodieAvroPayload; | ||
import org.apache.hudi.common.model.HoodieTableType; | ||
import org.apache.hudi.common.table.HoodieTableMetaClient; | ||
import org.apache.hudi.config.HoodieAWSConfig; | ||
import org.apache.hudi.hive.HiveSyncConfig; | ||
import org.apache.hudi.sync.common.model.FieldSchema; | ||
import org.junit.jupiter.api.AfterEach; | ||
import org.junit.jupiter.api.Assertions; | ||
import org.junit.jupiter.api.BeforeEach; | ||
import org.junit.jupiter.api.Test; | ||
import software.amazon.awssdk.services.glue.model.Column; | ||
import software.amazon.awssdk.services.glue.model.CreateDatabaseRequest; | ||
import software.amazon.awssdk.services.glue.model.CreatePartitionRequest; | ||
import software.amazon.awssdk.services.glue.model.CreateTableRequest; | ||
import software.amazon.awssdk.services.glue.model.DatabaseInput; | ||
import software.amazon.awssdk.services.glue.model.DeleteDatabaseRequest; | ||
import software.amazon.awssdk.services.glue.model.DeleteTableRequest; | ||
import software.amazon.awssdk.services.glue.model.PartitionInput; | ||
import software.amazon.awssdk.services.glue.model.SerDeInfo; | ||
import software.amazon.awssdk.services.glue.model.StorageDescriptor; | ||
import software.amazon.awssdk.services.glue.model.TableInput; | ||
|
||
import java.io.IOException; | ||
import java.nio.file.Files; | ||
import java.time.Instant; | ||
import java.util.Arrays; | ||
import java.util.List; | ||
import java.util.concurrent.ExecutionException; | ||
|
||
import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_BASE_PATH; | ||
import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_DATABASE_NAME; | ||
|
||
public class ITTestGluePartitionPushdown { | ||
|
||
private static final String MOTO_ENDPOINT = "http://localhost:5000"; | ||
private static final String DB_NAME = "db_name"; | ||
private static final String TABLE_NAME = "tbl_name"; | ||
private String basePath = Files.createTempDirectory("hivesynctest" + Instant.now().toEpochMilli()).toUri().toString(); | ||
private String tablePath = basePath + "/" + TABLE_NAME; | ||
private TypedProperties hiveSyncProps; | ||
private AWSGlueCatalogSyncClient glueSync; | ||
private FileSystem fileSystem; | ||
private Column[] partitionsColumn = {Column.builder().name("part1").type("int").build(), Column.builder().name("part2").type("string").build()}; | ||
List<FieldSchema> partitionsFieldSchema = Arrays.asList(new FieldSchema("part1", "int"), new FieldSchema("part2", "string")); | ||
|
||
public ITTestGluePartitionPushdown() throws IOException {} | ||
|
||
@BeforeEach | ||
public void setUp() throws Exception { | ||
hiveSyncProps = new TypedProperties(); | ||
hiveSyncProps.setProperty(HoodieAWSConfig.AWS_ACCESS_KEY.key(), "dummy"); | ||
hiveSyncProps.setProperty(HoodieAWSConfig.AWS_SECRET_KEY.key(), "dummy"); | ||
hiveSyncProps.setProperty(HoodieAWSConfig.AWS_SESSION_TOKEN.key(), "dummy"); | ||
hiveSyncProps.setProperty(HoodieAWSConfig.AWS_GLUE_ENDPOINT.key(), MOTO_ENDPOINT); | ||
hiveSyncProps.setProperty(HoodieAWSConfig.AWS_GLUE_REGION.key(), "eu-west-1"); | ||
hiveSyncProps.setProperty(META_SYNC_BASE_PATH.key(), tablePath); | ||
hiveSyncProps.setProperty(META_SYNC_DATABASE_NAME.key(), DB_NAME); | ||
|
||
HiveSyncConfig hiveSyncConfig = new HiveSyncConfig(hiveSyncProps, new Configuration()); | ||
fileSystem = hiveSyncConfig.getHadoopFileSystem(); | ||
fileSystem.mkdirs(new Path(tablePath)); | ||
Configuration configuration = new Configuration(); | ||
HoodieTableMetaClient.withPropertyBuilder() | ||
.setTableType(HoodieTableType.COPY_ON_WRITE) | ||
.setTableName(TABLE_NAME) | ||
.setPayloadClass(HoodieAvroPayload.class) | ||
.initTable(configuration, tablePath); | ||
|
||
glueSync = new AWSGlueCatalogSyncClient(new HiveSyncConfig(hiveSyncProps)); | ||
glueSync.awsGlue.createDatabase(CreateDatabaseRequest.builder().databaseInput(DatabaseInput.builder().name(DB_NAME).build()).build()).get(); | ||
|
||
glueSync.awsGlue.createTable(CreateTableRequest.builder().databaseName(DB_NAME) | ||
.tableInput(TableInput.builder().name(TABLE_NAME).partitionKeys( | ||
partitionsColumn) | ||
.storageDescriptor( | ||
StorageDescriptor.builder() | ||
.serdeInfo(SerDeInfo.builder().serializationLibrary("").build()) | ||
.location(tablePath) | ||
.columns( | ||
Column.builder().name("col1").type("string").build() | ||
) | ||
.build()) | ||
.build()).build()).get(); | ||
} | ||
|
||
@AfterEach | ||
public void teardown() throws Exception { | ||
glueSync.awsGlue.deleteTable(DeleteTableRequest.builder().databaseName(DB_NAME).name(TABLE_NAME).build()).get(); | ||
glueSync.awsGlue.deleteDatabase(DeleteDatabaseRequest.builder().name(DB_NAME).build()).get(); | ||
fileSystem.delete(new Path(tablePath), true); | ||
} | ||
|
||
@Test | ||
public void testEmptyPartitionShouldReturnEmpty() { | ||
Assertions.assertEquals(0, glueSync.getPartitionsByFilter(TABLE_NAME, | ||
glueSync.generatePushDownFilter(Arrays.asList("1/bar"), partitionsFieldSchema)).size()); | ||
} | ||
|
||
@Test | ||
public void testPresentPartitionShouldReturnIt() throws ExecutionException, InterruptedException { | ||
glueSync.awsGlue.createPartition(CreatePartitionRequest.builder().databaseName(DB_NAME).tableName(TABLE_NAME) | ||
.partitionInput(PartitionInput.builder() | ||
.storageDescriptor(StorageDescriptor.builder().columns(partitionsColumn).build()) | ||
.values("1", "b'ar").build()).build()).get(); | ||
|
||
Assertions.assertEquals(1, glueSync.getPartitionsByFilter(TABLE_NAME, | ||
glueSync.generatePushDownFilter(Arrays.asList("1/b'ar", "2/foo", "1/b''ar"), partitionsFieldSchema)).size()); | ||
} | ||
} |
Oops, something went wrong.