Data Science Stack Exchange
2026-06-05 09:27 UTC
Score 15.0
AI-111-20260605-social-media-02d00a89
Full article
I get an error when creating a csr_matrix data. from two merged pd.read_csv() data. the code is: from scipy.sparse import csr_matrix as csr from pandas import DataFrame as df from pandas import Series as s import pandas as pd df_1 = df({"no": s(["07628", "07628", "07628", "07628", "07628", "07419", "07419", "07419", "07419", "07419"], dtype="str"), "t": ["ex0", "ex1", "ex2", "ex3", "ex4", "ex5", "ex6", "ex7", "ex8", "ex9"], "penilai": ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]}) df_2 = df({"no": s(["07628", "07628", "07628", "07628", "07628", "07419", "07419", "07419", "07419", "07419"], dtype="str"), "vec": [8, 17, 1, 89, 37, 19, 53, 72, 8, 94]}) df1_csv = df_1.to_csv("one.csv", sep=";") df2_csv = df_2.to_csv("two.csv", sep=";") readcsv1 = pd.read_csv('one.csv', sep=";", header=0, names=["no", "t", "penilai"], usecols=["no", "t", "penilai"], dtype={"no": "str", "t": "str", "penilai": "str"}) readcsv2 = pd.read_csv("two.csv", sep=";", header=0, names=["vec", "no"], usecols=["vec", "no"], dtype={"no": "str", "vec": "float32"}) merger = readcsv1.merge(readcsv2, on="no", how="outer") row = merger["t"].astype("category").cat.codes col = merger["penilai"].astype("category").cat.codes val = merger["vec"].values csrm = csr((val, (row, col))) the error message is: ValueError Traceback (most recent call last) /tmp/ipykernel_3197/4294466243.py in () 20 col = merger["penilai"].astype("category").cat.codes 21 val = merger["vec"].values ---> 22 csrm = csr((val, (row, col))) 23 24…