Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

'can only concatenate str (not "float") to str' while all dtypes are float64, int64 #23

Open
GrigoriiTarasov opened this issue May 21, 2023 · 1 comment

Comments

@GrigoriiTarasov
Copy link

GrigoriiTarasov commented May 21, 2023

Just runed as in example

from boostaroota import BoostARoota

br = BoostARoota(metric='logloss')
br.fit(X, y)

for classification task

getting

[06:31:00] WARNING: ../src/learner.cc:767: 
Parameters: { "silent" } are not used.

Round:  1  iteration:  10
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[22], line 4
      1 br = BoostARoota(metric='logloss')
      3 #Fit the model for the subset of variables
----> 4 br.fit(X, y)

File ~/anaconda3/envs/filter/lib/python3.11/site-packages/boostaroota/boostaroota.py:46, in BoostARoota.fit(self, x, y)
     45 def fit(self, x, y):
---> 46     self.keep_vars_ = _BoostARoota(x, y,
     47                                    metric=self.metric,
     48                                    clf = self.clf,
     49                                    cutoff=self.cutoff,
     50                                    iters=self.iters,
     51                                    max_rounds=self.max_rounds,
     52                                    delta=self.delta,
     53                                    silent=self.silent)
     54     return self

File ~/anaconda3/envs/filter/lib/python3.11/site-packages/boostaroota/boostaroota.py:219, in _BoostARoota(x, y, metric, clf, cutoff, iters, max_rounds, delta, silent)
    217 i += 1
    218 if clf is None:
--> 219     crit, keep_vars = _reduce_vars_xgb(new_x,
    220                                        y,
    221                                        metric=metric,
    222                                        this_round=i,
    223                                        cutoff=cutoff,
    224                                        n_iterations=iters,
    225                                        delta=delta,
    226                                        silent=silent)
    227 else:
    228     crit, keep_vars = _reduce_vars_sklearn(new_x,
    229                                            y,
    230                                            clf=clf,
   (...)
    234                                            delta=delta,
    235                                            silent=silent)

File ~/anaconda3/envs/filter/lib/python3.11/site-packages/boostaroota/boostaroota.py:130, in _reduce_vars_xgb(x, y, metric, this_round, cutoff, n_iterations, delta, silent)
    127     if not silent:
    128         print("Round: ", this_round, " iteration: ", i)
--> 130 df['Mean'] = df.mean(axis=1)
    131 #Split them back out
    132 real_vars = df[~df['feature'].isin(shadow_names)]

File ~/anaconda3/envs/filter/lib/python3.11/site-packages/pandas/core/generic.py:11556, in NDFrame._add_numeric_operations.<locals>.mean(self, axis, skipna, numeric_only, **kwargs)
  11539 @doc(
  11540     _num_doc,
  11541     desc="Return the mean of the values over the requested axis.",
   (...)
  11554     **kwargs,
  11555 ):
> 11556     return NDFrame.mean(self, axis, skipna, numeric_only, **kwargs)

File ~/anaconda3/envs/filter/lib/python3.11/site-packages/pandas/core/generic.py:11201, in NDFrame.mean(self, axis, skipna, numeric_only, **kwargs)
  11194 def mean(
  11195     self,
  11196     axis: Axis | None = 0,
   (...)
  11199     **kwargs,
  11200 ) -> Series | float:
> 11201     return self._stat_function(
  11202         "mean", nanops.nanmean, axis, skipna, numeric_only, **kwargs
  11203     )

File ~/anaconda3/envs/filter/lib/python3.11/site-packages/pandas/core/generic.py:11158, in NDFrame._stat_function(self, name, func, axis, skipna, numeric_only, **kwargs)
  11154     nv.validate_stat_func((), kwargs, fname=name)
  11156 validate_bool_kwarg(skipna, "skipna", none_allowed=False)
> 11158 return self._reduce(
  11159     func, name=name, axis=axis, skipna=skipna, numeric_only=numeric_only
  11160 )

File ~/anaconda3/envs/filter/lib/python3.11/site-packages/pandas/core/frame.py:10524, in DataFrame._reduce(self, op, name, axis, skipna, numeric_only, filter_type, **kwds)
  10520     df = df.T
  10522 # After possibly _get_data and transposing, we are now in the
  10523 #  simple case where we can use BlockManager.reduce
> 10524 res = df._mgr.reduce(blk_func)
  10525 out = df._constructor(res).iloc[0]
  10526 if out_dtype is not None:

File ~/anaconda3/envs/filter/lib/python3.11/site-packages/pandas/core/internals/managers.py:1534, in BlockManager.reduce(self, func)
   1532 res_blocks: list[Block] = []
   1533 for blk in self.blocks:
-> 1534     nbs = blk.reduce(func)
   1535     res_blocks.extend(nbs)
   1537 index = Index([None])  # placeholder

File ~/anaconda3/envs/filter/lib/python3.11/site-packages/pandas/core/internals/blocks.py:339, in Block.reduce(self, func)
    333 @final
    334 def reduce(self, func) -> list[Block]:
    335     # We will apply the function and reshape the result into a single-row
    336     #  Block with the same mgr_locs; squeezing will be done at a higher level
    337     assert self.ndim == 2
--> 339     result = func(self.values)
    341     if self.values.ndim == 1:
    342         # TODO(EA2D): special case not needed with 2D EAs
    343         res_values = np.array([[result]])

File ~/anaconda3/envs/filter/lib/python3.11/site-packages/pandas/core/frame.py:10487, in DataFrame._reduce.<locals>.blk_func(values, axis)
  10485     return values._reduce(name, skipna=skipna, **kwds)
  10486 else:
> 10487     return op(values, axis=axis, skipna=skipna, **kwds)

File ~/anaconda3/envs/filter/lib/python3.11/site-packages/pandas/core/nanops.py:96, in disallow.__call__.<locals>._f(*args, **kwargs)
     94 try:
     95     with np.errstate(invalid="ignore"):
---> 96         return f(*args, **kwargs)
     97 except ValueError as e:
     98     # we want to transform an object array
     99     # ValueError message to the more typical TypeError
    100     # e.g. this is normally a disallowed function on
    101     # object arrays that contain strings
    102     if is_object_dtype(args[0]):

File ~/anaconda3/envs/filter/lib/python3.11/site-packages/pandas/core/nanops.py:158, in bottleneck_switch.__call__.<locals>.f(values, axis, skipna, **kwds)
    156         result = alt(values, axis=axis, skipna=skipna, **kwds)
    157 else:
--> 158     result = alt(values, axis=axis, skipna=skipna, **kwds)
    160 return result

File ~/anaconda3/envs/filter/lib/python3.11/site-packages/pandas/core/nanops.py:421, in _datetimelike_compat.<locals>.new_func(values, axis, skipna, mask, **kwargs)
    418 if datetimelike and mask is None:
    419     mask = isna(values)
--> 421 result = func(values, axis=axis, skipna=skipna, mask=mask, **kwargs)
    423 if datetimelike:
    424     result = _wrap_results(result, orig_values.dtype, fill_value=iNaT)

File ~/anaconda3/envs/filter/lib/python3.11/site-packages/pandas/core/nanops.py:727, in nanmean(values, axis, skipna, mask)
    724     dtype_count = dtype
    726 count = _get_counts(values.shape, mask, axis, dtype=dtype_count)
--> 727 the_sum = _ensure_numeric(values.sum(axis, dtype=dtype_sum))
    729 if axis is not None and getattr(the_sum, "ndim", False):
    730     count = cast(np.ndarray, count)

File ~/anaconda3/envs/filter/lib/python3.11/site-packages/numpy/core/_methods.py:48, in _sum(a, axis, dtype, out, keepdims, initial, where)
     43 def _amin(a, axis=None, out=None, keepdims=False,
     44           initial=_NoValue, where=True):
     45     return umr_minimum(a, axis, None, out, keepdims, initial, where)
     47 def _sum(a, axis=None, dtype=None, out=None, keepdims=False,
---> 48          initial=_NoValue, where=True):
     49     return umr_sum(a, axis, dtype, out, keepdims, initial, where)
     51 def _prod(a, axis=None, dtype=None, out=None, keepdims=False,
     52           initial=_NoValue, where=True):

TypeError: can only concatenate str (not "float") to str```


When I calculate `df.mean(axis=1)` it outputs correct answer without failing.
@GrigoriiTarasov GrigoriiTarasov changed the title can only concatenate str (not "float") to str while all dtypes are float64, int64 'can only concatenate str (not "float") to str' while all dtypes are float64, int64 May 21, 2023
@jganitkevitch
Copy link

This should be fixed by #24.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants