How to use the pyreadstat.read_sav function in pyreadstat

To help you get started, we’ve selected a few pyreadstat examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Roche / pyreadstat / tests / test_basic.py View on Github external
def test_sav_write_basic(self):

        file_label = "basic write"
        file_note = "These are some notes"
        col_labels = ["mychar label","mynum label", "mydate label", "dtime label", None, "myord label", "mytime label"]
        variable_value_labels = {'mylabl': {1.0: 'Male', 2.0: 'Female'}, 'myord': {1.0: 'low', 2.0: 'medium', 3.0: 'high'}}
        missing_ranges = {'mychar':['a'], 'myord': [{'hi':2, 'lo':1}]}
        #variable_alignment = {'mychar':"center", 'myord':"right"}
        variable_display_width = {'mychar':20}
        variable_measure = {"mychar": "nominal"}
        path = os.path.join(self.write_folder, "basic_write.sav")
        pyreadstat.write_sav(self.df_pandas, path, file_label=file_label, column_labels=col_labels, note=file_note, 
            variable_value_labels=variable_value_labels, missing_ranges=missing_ranges, variable_display_width=variable_display_width,
            variable_measure=variable_measure) #, variable_alignment=variable_alignment)
        df, meta = pyreadstat.read_sav(path, user_missing=True)
        self.assertTrue(df.equals(self.df_pandas))
        self.assertEqual(meta.file_label, file_label)
        self.assertListEqual(meta.column_labels, col_labels)
        self.assertEqual(meta.notes[0], file_note)
        self.assertDictEqual(meta.variable_value_labels, variable_value_labels)
        self.assertEqual(meta.variable_display_width['mychar'], variable_display_width['mychar'])
        #self.assertDictEqual(meta.variable_alignment, variable_alignment)
        self.assertEqual(meta.variable_measure["mychar"], variable_measure["mychar"])
github Roche / pyreadstat / tests / test_basic.py View on Github external
def test_sav_metaonly(self):

        df, meta = pyreadstat.read_sav(os.path.join(self.basic_data_folder, "sample.sav"))
        df2, meta2 = pyreadstat.read_sav(os.path.join(self.basic_data_folder, "sample.sav"), metadataonly=True)
        self.assertTrue(df2.empty)
        self.assertTrue(meta.number_columns == meta2.number_columns)
        self.assertTrue(meta.number_rows == meta2.number_rows)
        self.assertTrue(meta.column_names == meta2.column_names)
        self.assertTrue(meta.column_labels == meta2.column_labels)
        self.assertTrue(len(meta2.notes) > 0)
github Roche / pyreadstat / tests / test_basic.py View on Github external
def test_zsav_formatted(self):
        df, meta = pyreadstat.read_sav(os.path.join(self.basic_data_folder, "sample.zsav"), apply_value_formats=True, formats_as_category=True)
        self.assertTrue(df.equals(self.df_pandas_formatted))
        self.assertTrue(meta.number_columns == len(self.df_pandas_formatted.columns))
        self.assertTrue(meta.number_rows == len(self.df_pandas_formatted))
        self.assertTrue(len(meta.notes) > 0)
github Roche / pyreadstat / tests / test_basic.py View on Github external
def test_sav_nodates(self):
        df, meta = pyreadstat.read_sav(os.path.join(self.basic_data_folder, "sample.sav"), disable_datetime_conversion=True)
        self.assertTrue(df.equals(self.df_nodates_spss))
github Roche / pyreadstat / tests / test_basic.py View on Github external
def test_zsav_metaonly(self):

        df, meta = pyreadstat.read_sav(os.path.join(self.basic_data_folder, "sample.zsav"))
        df2, meta2 = pyreadstat.read_sav(os.path.join(self.basic_data_folder, "sample.sav"), metadataonly=True)
        self.assertTrue(df2.empty)
        self.assertTrue(meta.number_columns == meta2.number_columns)
        self.assertTrue(meta.number_rows == meta2.number_rows)
        self.assertTrue(meta.column_names == meta2.column_names)
        self.assertTrue(meta.column_labels == meta2.column_labels)
        self.assertTrue(len(meta2.notes) > 0)
github Roche / pyreadstat / tests / test_basic.py View on Github external
def test_zsav_nodates(self):
        df, meta = pyreadstat.read_sav(os.path.join(self.basic_data_folder, "sample.zsav"), disable_datetime_conversion=True)
        self.assertTrue(df.equals(self.df_nodates_spss))
github Roche / pyreadstat / tests / test_basic.py View on Github external
def test_sav_user_missing(self):
        sav_file = os.path.join(self.missing_data_folder, "missing_test.sav")
        unformatted_csv = os.path.join(self.missing_data_folder, "missing_sav_unformatted.csv")
        formatted_csv = os.path.join(self.missing_data_folder, "missing_sav_formatted.csv")
        labeled_csv = os.path.join(self.missing_data_folder, "missing_sav_labeled.csv")
        
        df_sas, meta = pyreadstat.read_sav(sav_file)
        df_csv = pd.read_csv(unformatted_csv)
        self.assertTrue(df_sas.equals(df_csv))
        
        df_sas, meta = pyreadstat.read_sav(sav_file, user_missing=True)
        df_csv = pd.read_csv(formatted_csv)
        self.assertTrue(df_sas.equals(df_csv))
        
        df_sas, meta = pyreadstat.read_sav(sav_file,
                            apply_value_formats=True, user_missing=True,
                            formats_as_category=False)
        df_sas['var1'].loc[1] = int(df_sas['var1'][1])
        df_sas['var1'] = df_sas['var1'].astype(str)
        df_csv = pd.read_csv(labeled_csv)
        self.assertTrue(df_sas.equals(df_csv))
github Roche / pyreadstat / tests / test_basic.py View on Github external
def test_sav_metaonly(self):

        df, meta = pyreadstat.read_sav(os.path.join(self.basic_data_folder, "sample.sav"))
        df2, meta2 = pyreadstat.read_sav(os.path.join(self.basic_data_folder, "sample.sav"), metadataonly=True)
        self.assertTrue(df2.empty)
        self.assertTrue(meta.number_columns == meta2.number_columns)
        self.assertTrue(meta.number_rows == meta2.number_rows)
        self.assertTrue(meta.column_names == meta2.column_names)
        self.assertTrue(meta.column_labels == meta2.column_labels)
        self.assertTrue(len(meta2.notes) > 0)
github Roche / pyreadstat / tests / test_basic.py View on Github external
def test_zsav_usecols(self):
        df, meta = pyreadstat.read_sav(os.path.join(self.basic_data_folder, "sample.zsav"), usecols=self.usecols)
        self.assertTrue(df.equals(self.df_usecols))
        self.assertTrue(meta.number_columns == len(self.usecols))
        self.assertTrue(meta.column_names == self.usecols)
github Roche / pyreadstat / tests / test_basic.py View on Github external
def test_sav_missing_char(self):
        df, meta = pyreadstat.read_sav(os.path.join(self.missing_data_folder, "missing_char.sav"))
        mdf = pd.DataFrame([[np.nan], ["a"]], columns=["mychar"])
        self.assertTrue(df.equals(mdf))
        self.assertTrue(meta.missing_ranges == {})
        df2, meta2 = pyreadstat.read_sav(os.path.join(self.missing_data_folder, "missing_char.sav"), user_missing=True)
        mdf2 = pd.DataFrame([["Z"], ["a"]], columns=["mychar"])
        self.assertTrue(df2.equals(mdf2))
        self.assertTrue(meta2.missing_ranges['mychar'][0]=={'lo': "Z", 'hi': "Z"})

pyreadstat

Reads and Writes SAS, SPSS and Stata files into/from pandas data frames.

Apache-2.0
Latest version published 1 month ago

Package Health Score

84 / 100
Full package analysis

Similar packages