使用pandas读取csv:
BPIC_2017_df = pd.read_csv('./datasets_csv/BPI_Challenge_2017.csv', index_col=0)
但是由于csv数据集太大了,读取报错:
--------------------------------------------------------------------------- TypeError Traceback (most recent call last)in 2 ----> 3 BPIC_2017_df = pd.read_csv('./datasets_csv/BPI_Challenge_2017.csv', index_col=0) E:anaconda3envstf_keraslibsite-packagespandasioparsers.py in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision) 684 ) 685 --> 686 return _read(filepath_or_buffer, kwds) 687 688 E:anaconda3envstf_keraslibsite-packagespandasioparsers.py in _read(filepath_or_buffer, kwds) 456 457 try: --> 458 data = parser.read(nrows) 459 finally: 460 parser.close() E:anaconda3envstf_keraslibsite-packagespandasioparsers.py in read(self, nrows) 1194 def read(self, nrows=None): 1195 nrows = _validate_integer("nrows", nrows) -> 1196 ret = self._engine.read(nrows) 1197 1198 # May alter columns / col_dict E:anaconda3envstf_keraslibsite-packagespandasioparsers.py in read(self, nrows) 2229 2230 names, data = self._do_date_conversions(names, data) -> 2231 index, names = self._make_index(data, alldata, names) 2232 2233 # maybe create a mi on the columns E:anaconda3envstf_keraslibsite-packagespandasioparsers.py in _make_index(self, data, alldata, columns, indexnamerow) 1675 elif not self._has_complex_date_col: 1676 index = self._get_simple_index(alldata, columns) -> 1677 index = self._agg_index(index) 1678 elif self._has_complex_date_col: 1679 if not self._name_processed: E:anaconda3envstf_keraslibsite-packagespandasioparsers.py in _agg_index(self, index, try_parse_dates) 1768 ) 1769 -> 1770 arr, _ = self._infer_types(arr, col_na_values | col_na_fvalues) 1771 arrays.append(arr) 1772 E:anaconda3envstf_keraslibsite-packagespandasioparsers.py in _infer_types(self, values, na_values, try_num_bool) 1869 na_count = 0 1870 if issubclass(values.dtype.type, (np.number, np.bool_)): -> 1871 mask = algorithms.isin(values, list(na_values)) 1872 na_count = mask.sum() 1873 if na_count > 0: E:anaconda3envstf_keraslibsite-packagespandascorealgorithms.py in isin(comps, values) 441 # If the the values include nan we need to check for nan explicitly 442 # since np.nan it not equal to np.nan --> 443 if np.isnan(values).any(): 444 f = lambda c, v: np.logical_or(np.in1d(c, v), np.isnan(c)) 445 else: TypeError: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''
具体错误为:
TypeError: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''
我的解决方法是把
BPIC_2017_df = pd.read_csv('./datasets_csv/BPI_Challenge_2017.csv', index_col=0)
中的", index_col=0"这个设置去掉,读取成功!!!



