I get an error when creating a csr_matrix data. from two merged pd.read_csv() data.
the code is:
from scipy.sparse import csr_matrix as csr
from pandas import DataFrame as df
from pandas import Series as s
import pandas as pd
df_1 = df({"no": s(["07628", "07628", "07628", "07628", "07628", "07419", "07419", "07419", "07419", "07419"], dtype="str"), "t": ["ex0", "ex1", "ex2", "ex3", "ex4", "ex5", "ex6", "ex7", "ex8", "ex9"], "penilai": ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]})
df_2 = df({"no": s(["07628", "07628", "07628", "07628", "07628", "07419", "07419", "07419", "07419", "07419"], dtype="str"), "vec": [8, 17, 1, 89, 37, 19, 53, 72, 8, 94]})
df1_csv = df_1.to_csv("one.csv", sep=";")
df2_csv = df_2.to_csv("two.csv", sep=";")
readcsv1 = pd.read_csv('one.csv', sep=";", header=0, names=["no", "t", "penilai"], usecols=["no", "t", "penilai"], dtype={"no": "str", "t": "str", "penilai": "str"})
readcsv2 = pd.read_csv("two.csv", sep=";", header=0, names=["vec", "no"], usecols=["vec", "no"], dtype={"no": "str", "vec": "float32"})
merger = readcsv1.merge(readcsv2, on="no", how="outer")
row = merger["t"].astype("category").cat.codes
col = merger["penilai"].astype("category").cat.codes
val = merger["vec"].values
csrm = csr((val, (row, col)))
the error message is:
ValueError Traceback (most recent call last) /tmp/ipykernel_3197/4294466243.py in <cell line: 0>()
20 col = merger["penilai"].astype("category").cat.codes
21 val = merger["vec"].values
---> 22 csrm = csr((val, (row, col)))
23
24 knn = nn(n_neighbors=3, metric="euclidean")
2 frames
/usr/local/lib/python3.12/dist-packages/scipy/sparse/_compressed.py in __init__(self, arg1, shape, dtype, copy, maxprint)
55 if len(arg1) == 2:
56 # (data, ij) format
---> 57 coo = self._coo_container(arg1, shape=shape, dtype=dtype)
58 arrays = coo._coo_to_compressed(self._swap)
59 self.indptr, self.indices, self.data, self._shape = arrays /usr/local/lib/python3.12/dist-packages/scipy/sparse/_coo.py in __init__(self, arg1, shape, dtype, copy, maxprint)
99 self.coords = tuple(idx.astype(np.int64, copy=False) for idx in self.coords)
100
--> 101 self._check()
102
103 @property /usr/local/lib/python3.12/dist-packages/scipy/sparse/_coo.py in _check(self)
223 f'matrix dimension {self.shape[i]}')
224 if idx.min() < 0:
--> 225 raise ValueError(f'negative axis {i} index: {idx.min()}')
226
227 def transpose(self, axes=None, copy=False):
ValueError: negative axis 0 index: -1
```