@@ -297,15 +297,15 @@ def get_data_home():
297297DATA_HOME = get_data_home ()
298298
299299
300- def download_libsvm (dataset , destination , replace = False ):
300+ def download_libsvm (dataset , destination , replace = False , verbose = False ):
301301 """Download a dataset from LIBSVM website."""
302302 url = ("https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/" +
303303 NAMES [dataset ])
304- path = download (url , destination , replace = replace )
304+ path = download (url , destination , replace = replace , verbose = verbose )
305305 return path
306306
307307
308- def _get_X_y (dataset , multilabel , replace = False ):
308+ def _get_X_y (dataset , multilabel , replace = False , verbose = False ):
309309 """Load a LIBSVM dataset as sparse X and observation y/Y.
310310 If X and y already exists as npz and npy, they are not redownloaded unless
311311 replace=True."""
@@ -330,20 +330,21 @@ def _get_X_y(dataset, multilabel, replace=False):
330330 source_path = DATA_HOME / NAMES [dataset ]
331331 if not source_path .parent .exists ():
332332 source_path .parent .mkdir (parents = True )
333- download_libsvm (dataset , source_path , replace = replace )
333+ download_libsvm (dataset , source_path , replace = replace , verbose = verbose )
334334
335335 # decompress file only if it is compressed
336336 if NAMES [dataset ].endswith ('.bz2' ):
337337 decompressor = BZ2Decompressor ()
338- print ("Decompressing..." )
338+ if verbose :
339+ print ("Decompressing..." )
339340 with open (tmp_path , "wb" ) as f , open (source_path , "rb" ) as g :
340341 for data in iter (lambda : g .read (100 * 1024 ), b'' ):
341342 f .write (decompressor .decompress (data ))
342343 source_path .unlink ()
343344
344345 n_features_total = N_FEATURES [dataset ]
345-
346- print ("Loading svmlight file..." )
346+ if verbose :
347+ print ("Loading svmlight file..." )
347348 with open (tmp_path , 'rb' ) as f :
348349 X , y = load_svmlight_file (
349350 f , n_features = n_features_total , multilabel = multilabel )
@@ -383,7 +384,8 @@ def _get_X_y(dataset, multilabel, replace=False):
383384 return X , y
384385
385386
386- def fetch_libsvm (dataset , replace = False , normalize = False , min_nnz = 0 ):
387+ def fetch_libsvm (dataset , replace = False , normalize = False , min_nnz = 0 ,
388+ verbose = False ):
387389 """
388390 Download a dataset from LIBSVM website.
389391
@@ -404,6 +406,10 @@ def fetch_libsvm(dataset, replace=False, normalize=False, min_nnz=0):
404406 When X is sparse, columns of X with strictly less than min_nnz
405407 non-zero entries are discarded.
406408
409+ verbose : bool, default=False
410+ Whether or not to print information about dataset loading.
411+
412+
407413 Returns
408414 -------
409415 X : np.ndarray or scipy.sparse.csc_matrix
@@ -425,8 +431,9 @@ def fetch_libsvm(dataset, replace=False, normalize=False, min_nnz=0):
425431 multilabel = NAMES [dataset ].split ('/' )[0 ] == 'multilabel'
426432 is_regression = NAMES [dataset ].split ('/' )[0 ] == 'regression'
427433
428- print ("Dataset: %s" % dataset )
429- X , y = _get_X_y (dataset , multilabel , replace = replace )
434+ if verbose :
435+ print ("Dataset: %s" % dataset )
436+ X , y = _get_X_y (dataset , multilabel , replace = replace , verbose = verbose )
430437
431438 # removing columns with to few non zero entries when using sparse X
432439 if sparse .issparse (X ) and min_nnz != 0 :
0 commit comments