I have a dataframe called star_trek of size 11174 rows × 27577 columns with every column being a boolean.
When I try to visualize the data using TSNE
tsne = TSNE(n_components = 2, metric='manhattan', init='pca', verbose=1)
tsne_data = tsne.fit_transform(star_trek.sample(frac = .5))
plt.figure()
plt.scatter(tsne_data[:,0],tsne_data[:,1])
plt.title("tsne in 2-dims on Star Trek data")
plt.show()
I get the following error message after this output at verbosity level 1
[t-SNE] Computing 91 nearest neighbors...
[t-SNE] Indexed 5587 samples in 0.064s...
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
Cell In[49], line 3
1 tsne = TSNE(n_components = 2, metric='manhattan', init='pca', verbose=1)
----> 3 tsne_data = tsne.fit_transform(star_trek.sample(frac = .5))
5 plt.figure()
6 plt.scatter(tsne_data[:,0],tsne_data[:,1])
File ~/opt/anaconda3/lib/python3.9/site-packages/sklearn/manifold/_t_sne.py:1119, in TSNE.fit_transform(self, X, y)
1117 self._validate_params()
1118 self._check_params_vs_input(X)
-> 1119 embedding = self._fit(X)
1120 self.embedding_ = embedding
1121 return self.embedding_
File ~/opt/anaconda3/lib/python3.9/site-packages/sklearn/manifold/_t_sne.py:963, in TSNE._fit(self, X, skip_num_points)
956 print(
957 "[t-SNE] Indexed {} samples in {:.3f}s...".format(
958 n_samples, duration
959 )
960 )
962 t0 = time()
--> 963 distances_nn = knn.kneighbors_graph(mode="distance")
964 duration = time() - t0
965 if self.verbose:
File ~/opt/anaconda3/lib/python3.9/site-packages/sklearn/neighbors/_base.py:988, in KNeighborsMixin.kneighbors_graph(self, X, n_neighbors, mode)
985 A_data = np.ones(n_queries * n_neighbors)
987 elif mode == "distance":
--> 988 A_data, A_ind = self.kneighbors(X, n_neighbors, return_distance=True)
989 A_data = np.ravel(A_data)
991 else:
File ~/opt/anaconda3/lib/python3.9/site-packages/sklearn/neighbors/_base.py:824, in KNeighborsMixin.kneighbors(self, X, n_neighbors, return_distance)
817 use_pairwise_distances_reductions = (
818 self._fit_method == "brute"
819 and ArgKmin.is_usable_for(
820 X if X is not None else self._fit_X, self._fit_X, self.effective_metric_
821 )
822 )
823 if use_pairwise_distances_reductions:
--> 824 results = ArgKmin.compute(
825 X=X,
826 Y=self._fit_X,
827 k=n_neighbors,
828 metric=self.effective_metric_,
829 metric_kwargs=self.effective_metric_params_,
830 strategy="auto",
831 return_distance=return_distance,
832 )
834 elif (
835 self._fit_method == "brute" and self.metric == "precomputed" and issparse(X)
836 ):
837 results = _kneighbors_from_graph(
838 X, n_neighbors=n_neighbors, return_distance=return_distance
839 )
File ~/opt/anaconda3/lib/python3.9/site-packages/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py:289, in ArgKmin.compute(cls, X, Y, k, metric, chunk_size, metric_kwargs, strategy, return_distance)
277 return ArgKmin64.compute(
278 X=X,
279 Y=Y,
(...)
285 return_distance=return_distance,
286 )
288 if X.dtype == Y.dtype == np.float32:
--> 289 return ArgKmin32.compute(
290 X=X,
291 Y=Y,
292 k=k,
293 metric=metric,
294 chunk_size=chunk_size,
295 metric_kwargs=metric_kwargs,
296 strategy=strategy,
297 return_distance=return_distance,
298 )
300 raise ValueError(
301 "Only float64 or float32 datasets pairs are supported at this time, "
302 f"got: X.dtype={X.dtype} and Y.dtype={Y.dtype}."
303 )
File sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx:584, in sklearn.metrics._pairwise_distances_reduction._argkmin.ArgKmin32.compute()
File ~/opt/anaconda3/lib/python3.9/site-packages/sklearn/utils/fixes.py:139, in threadpool_limits(limits, user_api)
137 return controller.limit(limits=limits, user_api=user_api)
138 else:
--> 139 return threadpoolctl.threadpool_limits(limits=limits, user_api=user_api)
File ~/opt/anaconda3/lib/python3.9/site-packages/threadpoolctl.py:171, in threadpool_limits.__init__(self, limits, user_api)
167 def __init__(self, limits=None, user_api=None):
168 self._limits, self._user_api, self._prefixes = \
169 self._check_params(limits, user_api)
--> 171 self._original_info = self._set_threadpool_limits()
File ~/opt/anaconda3/lib/python3.9/site-packages/threadpoolctl.py:268, in threadpool_limits._set_threadpool_limits(self)
265 if self._limits is None:
266 return None
--> 268 modules = _ThreadpoolInfo(prefixes=self._prefixes,
269 user_api=self._user_api)
270 for module in modules:
271 # self._limits is a dict {key: num_threads} where key is either
272 # a prefix or a user_api. If a module matches both, the limit
273 # corresponding to the prefix is chosed.
274 if module.prefix in self._limits:
File ~/opt/anaconda3/lib/python3.9/site-packages/threadpoolctl.py:340, in _ThreadpoolInfo.__init__(self, user_api, prefixes, modules)
337 self.user_api = [] if user_api is None else user_api
339 self.modules = []
--> 340 self._load_modules()
341 self._warn_if_incompatible_openmp()
342 else:
File ~/opt/anaconda3/lib/python3.9/site-packages/threadpoolctl.py:371, in _ThreadpoolInfo._load_modules(self)
369 """Loop through loaded libraries and store supported ones"""
370 if sys.platform == "darwin":
--> 371 self._find_modules_with_dyld()
372 elif sys.platform == "win32":
373 self._find_modules_with_enum_process_module_ex()
File ~/opt/anaconda3/lib/python3.9/site-packages/threadpoolctl.py:428, in _ThreadpoolInfo._find_modules_with_dyld(self)
425 filepath = filepath.decode("utf-8")
427 # Store the module if it is supported and selected
--> 428 self._make_module_from_path(filepath)
File ~/opt/anaconda3/lib/python3.9/site-packages/threadpoolctl.py:515, in _ThreadpoolInfo._make_module_from_path(self, filepath)
513 if prefix in self.prefixes or user_api in self.user_api:
514 module_class = globals()[module_class]
--> 515 module = module_class(filepath, prefix, user_api, internal_api)
516 self.modules.append(module)
File ~/opt/anaconda3/lib/python3.9/site-packages/threadpoolctl.py:606, in _Module.__init__(self, filepath, prefix, user_api, internal_api)
604 self.internal_api = internal_api
605 self._dynlib = ctypes.CDLL(filepath, mode=_RTLD_NOLOAD)
--> 606 self.version = self.get_version()
607 self.num_threads = self.get_num_threads()
608 self._get_extra_info()
File ~/opt/anaconda3/lib/python3.9/site-packages/threadpoolctl.py:646, in _OpenBLASModule.get_version(self)
643 get_config = getattr(self._dynlib, "openblas_get_config",
644 lambda: None)
645 get_config.restype = ctypes.c_char_p
--> 646 config = get_config().split()
647 if config[0] == b"OpenBLAS":
648 return config[1].decode("utf-8")
AttributeError: 'NoneType' object has no attribute 'split'
I have tried changing the perplexity and init values as per here and I have also tried downgrading numpy to 1.21.4 and upgrading threadpoolctl as per here but the problem persists.