-
Notifications
You must be signed in to change notification settings - Fork 86
Expand file tree
/
Copy pathDelegatingMultiSinkOutputCommitter.java
More file actions
192 lines (167 loc) · 6.24 KB
/
DelegatingMultiSinkOutputCommitter.java
File metadata and controls
192 lines (167 loc) · 6.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
/*
* Copyright © 2021 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package io.cdap.plugin.gcp.bigquery.sink;
import com.google.cloud.bigquery.DatasetId;
import io.cdap.cdap.api.data.schema.Schema;
import io.cdap.plugin.gcp.bigquery.sink.lib.BigQueryTableFieldSchema;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.JobStatus;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Output Committer which creates and delegates operations to other Bigquery Output Committer instances.
* <p>
* Delegated instances are supplied along with a schema, which is used to configure the commit operation.
*/
public class DelegatingMultiSinkOutputCommitter extends OutputCommitter {
private static final Logger LOG = LoggerFactory.getLogger(DelegatingMultiSinkOutputCommitter.class);
private final Map<String, OutputCommitter> committerMap;
private final Map<String, Schema> schemaMap;
private final String projectName;
private final String datasetName;
private final String bucketName;
private final String bucketPathUniqueId;
public DelegatingMultiSinkOutputCommitter(String projectName,
String datasetName,
String bucketName,
String bucketPathUniqueId) {
this.projectName = projectName;
this.datasetName = datasetName;
this.bucketName = bucketName;
this.bucketPathUniqueId = bucketPathUniqueId;
this.committerMap = new HashMap<>();
this.schemaMap = new HashMap<>();
}
/**
* Add a committer and schema to this instance.
* <p>
* The supplied committer and schema will be used when the commit operations are invoked.
*/
public void addCommitterAndSchema(OutputCommitter committer,
String tableName,
Schema schema,
TaskAttemptContext context) throws IOException, InterruptedException {
committerMap.put(tableName, committer);
schemaMap.put(tableName, schema);
//Configure the supplied committer.
committer.setupJob(context);
committer.setupTask(context);
}
@Override
public void setupJob(JobContext jobContext) throws IOException {
//no-op
}
@Override
public void setupTask(TaskAttemptContext taskAttemptContext) throws IOException {
//no-op
}
@Override
public boolean needsTaskCommit(TaskAttemptContext taskAttemptContext) throws IOException {
if (committerMap.isEmpty()) {
return false;
}
boolean needsTaskCommit = true;
for (OutputCommitter committer : committerMap.values()) {
needsTaskCommit = needsTaskCommit && committer.needsTaskCommit(taskAttemptContext);
}
return needsTaskCommit;
}
@Override
public void commitTask(TaskAttemptContext taskAttemptContext) throws IOException {
for (String tableName : committerMap.keySet()) {
try {
configureContext(taskAttemptContext, tableName);
committerMap.get(tableName).commitTask(taskAttemptContext);
} catch (IOException e) {
LOG.warn("BigQuery multi-sink table '{}' failed during task commit. Reason: {}",
tableName, getFailureReason(e), e);
throw e;
}
}
}
@Override
public void commitJob(JobContext jobContext) throws IOException {
for (String tableName : committerMap.keySet()) {
try {
configureContext(jobContext, tableName);
committerMap.get(tableName).commitJob(jobContext);
} catch (IOException e) {
LOG.warn("BigQuery multi-sink table '{}' failed during job commit. Reason: {}",
tableName, getFailureReason(e), e);
throw e;
}
}
}
@Override
public void abortTask(TaskAttemptContext taskAttemptContext) throws IOException {
IOException ioe = null;
for (OutputCommitter committer : committerMap.values()) {
try {
committer.abortTask(taskAttemptContext);
} catch (IOException e) {
if (ioe == null) {
ioe = e;
} else {
ioe.addSuppressed(e);
}
}
}
if (ioe != null) {
throw ioe;
}
}
@Override
public void abortJob(JobContext jobContext, JobStatus.State state) throws IOException {
IOException ioe = null;
for (OutputCommitter committer : committerMap.values()) {
try {
committer.abortJob(jobContext, state);
} catch (IOException e) {
if (ioe == null) {
ioe = e;
} else {
ioe.addSuppressed(e);
}
}
}
if (ioe != null) {
throw ioe;
}
}
public void configureContext(JobContext context, String tableName) throws IOException {
Schema schema = schemaMap.get(tableName);
List<BigQueryTableFieldSchema> fields = BigQuerySinkUtils.getBigQueryTableFieldsFromSchema(schema);
String gcsPath = BigQuerySinkUtils.getTemporaryGcsPath(bucketName, bucketPathUniqueId, tableName);
BigQuerySinkUtils.configureMultiSinkOutput(context.getConfiguration(),
DatasetId.of(projectName, datasetName),
tableName,
gcsPath,
fields);
}
private String getFailureReason(IOException exception) {
Throwable rootCause = exception;
while (rootCause.getCause() != null) {
rootCause = rootCause.getCause();
}
return rootCause.getMessage() == null ? exception.getMessage() : rootCause.getMessage();
}
}