Skip to content

Commit 9961765

Browse files
ddanielrdlmarion
andauthored
Disable delete optimization and exit ref loop faster (#6219)
* Exits scan after finding one reference When delete table is called, the delete marker code checks to see if any file references exist in other tables. However, only a single reference has to exist for delete markers to be created. Added break out of for loop once a single entry was found. Removed a nested try block in favor of a single try-with-resources * Allows table delete optimization to be disabled Adds a property to allow the scan of the metadata table to be skipped for table deletes. This forces delete markers to always be created when deleting tables instead of the manager deleting the volumes immediately. * Adds tests that verify files are deleted from HDFS Adds tests to verify that rfiles are removed from HDFS and not just from the table metadata. --------- Co-authored-by: Dave Marion <dlmarion@apache.org>
1 parent ffbfb3e commit 9961765

4 files changed

Lines changed: 416 additions & 15 deletions

File tree

core/src/main/java/org/apache/accumulo/core/conf/Property.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -391,6 +391,14 @@ public enum Property {
391391
"Maximum number of threads the TabletGroupWatcher will use in its BatchScanner to"
392392
+ " look for tablets that need maintenance.",
393393
"2.1.4"),
394+
MANAGER_TABLE_DELETE_OPTIMIZATION("manager.table.delete.optimization", "true",
395+
PropertyType.BOOLEAN,
396+
"When deleting a table the Manager will remove related table directories from "
397+
+ " the storage volumes if there are no other references to the files in the "
398+
+ " metadata table. When deleting a lot of tables this optimization can be costly. "
399+
+ " Setting this value to false will skip this optimization and the table directory "
400+
+ " cleanup will occur in the Garbage Collector instead.",
401+
"2.1.5"),
394402
MANAGER_BULK_RETRIES("manager.bulk.retries", "3", PropertyType.COUNT,
395403
"The number of attempts to bulk import a RFile before giving up.", "1.4.0"),
396404
MANAGER_BULK_THREADPOOL_SIZE("manager.bulk.threadpool.size", "5", PropertyType.COUNT,

server/base/src/main/java/org/apache/accumulo/server/util/MetadataTableUtil.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,7 @@ public static void deleteTable(TableId tableId, boolean insertDeletes, ServerCon
321321
Ample ample = context.getAmple();
322322
ms.setRange(new KeyExtent(tableId, null, null).toMetaRange());
323323

324-
// insert deletes before deleting data from metadata... this makes the code fault tolerant
324+
// insert deletes before deleting data from metadata... this makes the code fault-tolerant
325325
if (insertDeletes) {
326326

327327
ms.fetchColumnFamily(DataFileColumnFamily.NAME);

server/manager/src/main/java/org/apache/accumulo/manager/tableOps/delete/CleanUp.java

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,11 @@
2323
import java.util.Arrays;
2424
import java.util.Map.Entry;
2525

26-
import org.apache.accumulo.core.client.AccumuloClient;
2726
import org.apache.accumulo.core.client.BatchScanner;
2827
import org.apache.accumulo.core.client.IteratorSetting;
2928
import org.apache.accumulo.core.client.Scanner;
3029
import org.apache.accumulo.core.clientImpl.thrift.ThriftSecurityException;
30+
import org.apache.accumulo.core.conf.Property;
3131
import org.apache.accumulo.core.data.Key;
3232
import org.apache.accumulo.core.data.NamespaceId;
3333
import org.apache.accumulo.core.data.Range;
@@ -124,11 +124,14 @@ public Repo<Manager> call(long tid, Manager manager) {
124124

125125
int refCount = 0;
126126

127-
try {
127+
if (!manager.getConfiguration().getBoolean(Property.MANAGER_TABLE_DELETE_OPTIMIZATION)) {
128+
// Skip scanning the metadata table for each table delete and always allow the GC to handle
129+
// file deletion.
130+
refCount = -1;
131+
} else {
128132
// look for other tables that references this table's files
129-
AccumuloClient client = manager.getContext();
130133
try (BatchScanner bs =
131-
client.createBatchScanner(MetadataTable.NAME, Authorizations.EMPTY, 8)) {
134+
manager.getContext().createBatchScanner(MetadataTable.NAME, Authorizations.EMPTY, 8)) {
132135
Range allTables = TabletsSection.getRange();
133136
Range tableRange = TabletsSection.getRange(tableId);
134137
Range beforeTable =
@@ -142,15 +145,15 @@ public Repo<Manager> call(long tid, Manager manager) {
142145

143146
for (Entry<Key,Value> entry : bs) {
144147
if (entry.getKey().getColumnQualifier().toString().contains("/" + tableId + "/")) {
145-
refCount++;
148+
refCount = 1;
149+
break;
146150
}
147151
}
152+
} catch (Exception e) {
153+
refCount = -1;
154+
log.error("Failed to scan {} looking for references to deleted table {}",
155+
MetadataTable.NAME, tableId, e);
148156
}
149-
150-
} catch (Exception e) {
151-
refCount = -1;
152-
log.error("Failed to scan " + MetadataTable.NAME + " looking for references to deleted table "
153-
+ tableId, e);
154157
}
155158

156159
// remove metadata table entries
@@ -162,14 +165,14 @@ public Repo<Manager> call(long tid, Manager manager) {
162165
// are dropped and the operation completes, then the deletes will not be repeated.
163166
MetadataTableUtil.deleteTable(tableId, refCount != 0, manager.getContext(), null);
164167
} catch (Exception e) {
165-
log.error("error deleting " + tableId + " from metadata table", e);
168+
log.error("error deleting {} from metadata table", tableId, e);
166169
}
167170

168171
// remove any problem reports the table may have
169172
try {
170173
ProblemReports.getInstance(manager.getContext()).deleteProblemReports(tableId);
171174
} catch (Exception e) {
172-
log.error("Failed to delete problem reports for table " + tableId, e);
175+
log.error("Failed to delete problem reports for table {}", tableId, e);
173176
}
174177

175178
if (refCount == 0) {
@@ -196,7 +199,7 @@ public Repo<Manager> call(long tid, Manager manager) {
196199
manager.getTableManager().removeTable(tableId);
197200
manager.getContext().clearTableListCache();
198201
} catch (Exception e) {
199-
log.error("Failed to find table id in zookeeper", e);
202+
log.error("Failed to find table id {} in zookeeper", tableId, e);
200203
}
201204

202205
// remove any permissions associated with this table
@@ -210,7 +213,7 @@ public Repo<Manager> call(long tid, Manager manager) {
210213
Utils.unreserveTable(manager, tableId, tid, true);
211214
Utils.unreserveNamespace(manager, namespaceId, tid, false);
212215

213-
LoggerFactory.getLogger(CleanUp.class).debug("Deleted table " + tableId);
216+
log.debug("Deleted table {}", tableId);
214217

215218
return null;
216219
}

0 commit comments

Comments
 (0)