Adopted from https://www.youtube.com/playlist?list=PLINDUevGdb7U_ZRLCpqKWutmcY0vCUFOI
See Polars Tutorial2 for more.
import polars as pl
# df = pl.read_csv('pl_data.csv')
df = pl.DataFrame({'Name': ['Mario', 'Luigi', 'Wario', 'Mario', 'Mario'], 'Age': [30,28,26,30,30]})
print(df)
score_values = pl.Series([95,99,94,90,96])
df = df.with_columns(Score = pl.lit(score_values),
Score_x2 = pl.lit(score_values*2))
print(df)
print(df.drop('Score_x2'))
print(df.drop('Score_x2', 'Score'))
print(df.drop(['Score_x2', 'Score']))
df.drop_in_place('Score_x2')
print(df)
df = pl.DataFrame({'Name': ['Mario', 'Luigi', 'Wario', 'Mario', 'Mario'], 'Age': [30,28,26,30,30]})
print(df.unique())
for row in df.rows():
print(row)
print(row[0])
import matplotlib.pyplot as plt
df = pl.DataFrame({'Date': ['1/1/2023','1/2/2023','1/3/2023','1/4/2023','1/5/2023','1/6/2023','1/7/2023','1/8/2023','1/9/2023','1/10/2023'],
'Price': [15,16,16,15,14,13,14,17,16,18]})
dates = list(df['Date'])
prices = list(df['Price'])
plt.plot(dates, prices)
plt.show()
This operation requires that both pandas
and pyarrow
are installed.
x = df.to_pandas()
print(type(df))
print(type(x))
print(df)
print(x)
print(df.filter(df['Price'] > 15))
print(df.filter((df['Price'] > 15) & (df['Price'] < 17)))
df1 = pl.DataFrame({
'key': ['A', 'B', 'C', 'D'],
'value_left': [1, 2, 3, 4]
})
df2 = pl.DataFrame({
'key': ['B', 'C', 'D', 'E'],
'value_left': [5, 6, 7, 8]
})
joined_df = df1.join(df2, on = 'key')
print(joined_df)
joined_df = df1.join(df2, on = 'key', how = 'left')
print(joined_df)
df = pl.DataFrame({
'Name': ['Chris', 'Chris', 'Kyle', 'John', 'John', 'Chris', 'Kyle'],
'Calories': [100,200,50,100,350,150,450]
})
df2 = df.group_by(['Name']).agg(
sum_ = pl.col('Calories').sum(),
count_ = pl.col('Calories').count()
)
print(df2)
print(df.sort(['Name', 'Calories']))
renamed_col = df.rename({'Name': 'ID', 'Calories': 'Consumption'})
print(renamed_col)
print(df2.select(['Name', 'count_']))
df = pl.DataFrame({
'Name': ['John', 'Steven', 'Chris', None],
'Age': [20, 25, None, 32],
'Score': [99, None, 100, None]
})
print(df)
fill_df = df.fill_null('NULL_STRING')
print(fill_df)
fill_df = df.fill_null(0)
print(fill_df)
print(df.drop_nulls())
print(fill_df.drop_nulls())
df = pl.DataFrame({'Date': ['1/1/2023','1/2/2023','1/3/2023','1/4/2023','1/5/2023','1/6/2023','1/7/2023','1/8/2023','1/9/2023','1/10/2023'],
'Price': [15,16,16,15,14,13,14,17,16,18]})
df = df.with_columns(Date2 = df['Date'].str.to_date())
print(df)
df = df.with_columns(Date3 = df['Date2'].dt.strftime('%m%Y'))
print(df)
df = df.with_columns(Price = df['Price'].cast(pl.Utf8))
print(df)
df = df.with_columns(Price = df['Price'].cast(str))
print(df)
df = df.with_columns(Price = df['Price'].cast(pl.Float64))
print(df)
df = df.with_columns(Shifted = df['Date3'].shift(1)
.fill_null('SHIFTED'))
print(df)
There are other file formats you can export the DataFrame into, such as write_excel
and write_parquet
.
# df.write_csv('df.csv')
# Read the first sheet.
# df = pl.read_excel('data2.xlsx')
# Read the second sheet called "Sheet2".
# df = pl.read_excel('data2.xlsx', sheet_name = 'Sheet2')
Read a text file with |
delimited columns.
# df = pl.read_csv('pipe_sep.txt', seperator = '|')