Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
"""
import fastparquet
fmd = copy.copy(fmd)
for fn, rg in zip(filenames, writes):
if rg is not None:
if isinstance(rg, list):
for r in rg:
fmd.row_groups.append(r)
else:
for chunk in rg.columns:
chunk.file_path = fn
fmd.row_groups.append(rg)
fn = sep.join([path, "_metadata"])
fastparquet.writer.write_common_metadata(
fn, fmd, open_with=fs.open, no_row_groups=False
)
fn = sep.join([path, "_common_metadata"])
fastparquet.writer.write_common_metadata(fn, fmd, open_with=fs.open)
if rg is not None:
if isinstance(rg, list):
for r in rg:
fmd.row_groups.append(r)
else:
for chunk in rg.columns:
chunk.file_path = fn
fmd.row_groups.append(rg)
fn = sep.join([path, "_metadata"])
fastparquet.writer.write_common_metadata(
fn, fmd, open_with=fs.open, no_row_groups=False
)
fn = sep.join([path, "_common_metadata"])
fastparquet.writer.write_common_metadata(fn, fmd, open_with=fs.open)
):
raise ValueError(
"Appended columns not the same.\n"
"Previous: {} | New: {}".format(pf.columns, list(df.columns))
)
elif (pd.Series(pf.dtypes).loc[pf.columns] != df[pf.columns].dtypes).any():
raise ValueError(
"Appended dtypes differ.\n{}".format(
set(pf.dtypes.items()) ^ set(df.dtypes.iteritems())
)
)
else:
df = df[pf.columns + partition_on]
fmd = pf.fmd
i_offset = fastparquet.writer.find_max_part(fmd.row_groups)
if not ignore_divisions:
if not set(index_cols).intersection([division_info["name"]]):
ignore_divisions = True
if not ignore_divisions:
minmax = fastparquet.api.sorted_partitioned_columns(pf)
old_end = minmax[index_cols[0]]["max"][-1]
divisions = division_info["divisions"]
if divisions[0] < old_end:
raise ValueError(
"Appended divisions overlapping with previous ones."
"\n"
"Previous: {} | New: {}".format(old_end, divisions[0])
)
else:
fmd = fastparquet.writer.make_metadata(
df._meta,