Skip to content

Commit 81954a5

Browse files
authored
add verbose option (#27)
1 parent e558cd4 commit 81954a5

1 file changed

Lines changed: 17 additions & 10 deletions

File tree

libsvmdata/datasets.py

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -297,15 +297,15 @@ def get_data_home():
297297
DATA_HOME = get_data_home()
298298

299299

300-
def download_libsvm(dataset, destination, replace=False):
300+
def download_libsvm(dataset, destination, replace=False, verbose=False):
301301
"""Download a dataset from LIBSVM website."""
302302
url = ("https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/" +
303303
NAMES[dataset])
304-
path = download(url, destination, replace=replace)
304+
path = download(url, destination, replace=replace, verbose=verbose)
305305
return path
306306

307307

308-
def _get_X_y(dataset, multilabel, replace=False):
308+
def _get_X_y(dataset, multilabel, replace=False, verbose=False):
309309
"""Load a LIBSVM dataset as sparse X and observation y/Y.
310310
If X and y already exists as npz and npy, they are not redownloaded unless
311311
replace=True."""
@@ -330,20 +330,21 @@ def _get_X_y(dataset, multilabel, replace=False):
330330
source_path = DATA_HOME / NAMES[dataset]
331331
if not source_path.parent.exists():
332332
source_path.parent.mkdir(parents=True)
333-
download_libsvm(dataset, source_path, replace=replace)
333+
download_libsvm(dataset, source_path, replace=replace, verbose=verbose)
334334

335335
# decompress file only if it is compressed
336336
if NAMES[dataset].endswith('.bz2'):
337337
decompressor = BZ2Decompressor()
338-
print("Decompressing...")
338+
if verbose:
339+
print("Decompressing...")
339340
with open(tmp_path, "wb") as f, open(source_path, "rb") as g:
340341
for data in iter(lambda: g.read(100 * 1024), b''):
341342
f.write(decompressor.decompress(data))
342343
source_path.unlink()
343344

344345
n_features_total = N_FEATURES[dataset]
345-
346-
print("Loading svmlight file...")
346+
if verbose:
347+
print("Loading svmlight file...")
347348
with open(tmp_path, 'rb') as f:
348349
X, y = load_svmlight_file(
349350
f, n_features=n_features_total, multilabel=multilabel)
@@ -383,7 +384,8 @@ def _get_X_y(dataset, multilabel, replace=False):
383384
return X, y
384385

385386

386-
def fetch_libsvm(dataset, replace=False, normalize=False, min_nnz=0):
387+
def fetch_libsvm(dataset, replace=False, normalize=False, min_nnz=0,
388+
verbose=False):
387389
"""
388390
Download a dataset from LIBSVM website.
389391
@@ -404,6 +406,10 @@ def fetch_libsvm(dataset, replace=False, normalize=False, min_nnz=0):
404406
When X is sparse, columns of X with strictly less than min_nnz
405407
non-zero entries are discarded.
406408
409+
verbose : bool, default=False
410+
Whether or not to print information about dataset loading.
411+
412+
407413
Returns
408414
-------
409415
X : np.ndarray or scipy.sparse.csc_matrix
@@ -425,8 +431,9 @@ def fetch_libsvm(dataset, replace=False, normalize=False, min_nnz=0):
425431
multilabel = NAMES[dataset].split('/')[0] == 'multilabel'
426432
is_regression = NAMES[dataset].split('/')[0] == 'regression'
427433

428-
print("Dataset: %s" % dataset)
429-
X, y = _get_X_y(dataset, multilabel, replace=replace)
434+
if verbose:
435+
print("Dataset: %s" % dataset)
436+
X, y = _get_X_y(dataset, multilabel, replace=replace, verbose=verbose)
430437

431438
# removing columns with to few non zero entries when using sparse X
432439
if sparse.issparse(X) and min_nnz != 0:

0 commit comments

Comments
 (0)