Python, pandas - Traverse to Column columns

Using pandas, what the following might look like DataFrame

:

In [1]: import pandas as pd
In [2]: pd.DataFrame({'month': [1, 1, 1, 2, 2, 3, 3],
                      'type': ["T1", "T1", "T4", "T2", "T3", "T1", "T3"],
                      'value': [10, 40, 20, 30, 10, 40, 50]})
Out[2]: 
   month type  value
0      1   T1     10
1      1   T1     40
2      1   T4     20
3      2   T2     30
4      2   T3     10
5      3   T1     40
6      3   T3     50

      

to get the result below?

Out[3]: 
   T1  T2  T3  T4  month
0  10   0   0   0      1
1  40   0   0   0      1
2   0   0   0  20      1
3   0  30   0   0      2
4   0   0  10   0      2
5  40   0   0   0      3
6   0   0  50   0      3

      

+3


source to share


3 answers


pandas


Smart use pd.get_dummies

pd.get_dummies(df.type).mul(df.value, 0).join(df.month)

   T1  T2  T3  T4  month
0  10   0   0   0      1
1  40   0   0   0      1
2   0   0   0  20      1
3   0  30   0   0      2
4   0   0  10   0      2
5  40   0   0   0      3
6   0   0  50   0      3

      


numpy


Or an idea, but super charged



u, inv = np.unique(df.type.values, return_inverse=True)
eye = np.eye(u.size, dtype=int)
v = df.value.values
m = df.month.values
pd.DataFrame(
    np.column_stack([eye[inv] * v[:, None], m]),
    df.index, np.append(u, 'month')
)

   T1  T2  T3  T4  month
0  10   0   0   0      1
1  40   0   0   0      1
2   0   0   0  20      1
3   0  30   0   0      2
4   0   0  10   0      2
5  40   0   0   0      3
6   0   0  50   0      3

      


Timing

%timeit pd.get_dummies(df.type).mul(df.value, 0).join(df.month)
1000 loops, best of 3: 1.1 ms per loop

%%timeit
u, inv = np.unique(df.type.values, return_inverse=True)
eye = np.eye(u.size, dtype=int)
v = df.value.values
m = df.month.values
pd.DataFrame(
    np.column_stack([eye[inv] * v[:, None], m]),
    df.index, np.append(u, 'month')
)
10000 loops, best of 3: 189 ยตs per loop

%%timeit
(df.set_index(['type'],append=True)['value']
   .unstack(fill_value=0)).join(df[['month']])
100 loops, best of 3: 1.92 ms per loop

%%timeit
d1 = df.set_index(['month','type'], append=True)['value'] \
       .unstack(fill_value=0) \
       .reset_index(level=1) \

cols = d1.columns[1:].tolist() + d1.columns[:1].tolist() 
d1 = d1.reindex_axis(cols, axis=1)
d1
100 loops, best of 3: 2.48 ms per loop

      

+4


source


You can use a combination of set_index

and unstack

to get the T1

- columns T4

and then join the month column like this:



(df.set_index(['type'],append=True)['value']
   .unstack(fill_value=0)).join(df[['month']])
#    T1  T2  T3  T4  month
# 0  10   0   0   0      1
# 1  40   0   0   0      1
# 2   0   0   0  20      1
# 3   0  30   0   0      2
# 4   0   0  10   0      2
# 5  40   0   0   0      3
# 6   0   0  50   0      3 

      

+3


source


You can use set_index

, unstack

and reset_index

. The latter for the order of changing the columns adds reindex_axis

:

df = df.set_index(['month','type'], append=True)['value']
       .unstack(fill_value=0)
       .reset_index(level=1)
#reorder columns
cols = df.columns[1:].tolist() + df.columns[:1].tolist() 
df = df.reindex_axis(cols, axis=1)
print (df)
type  T1  T2  T3  T4  month
0     10   0   0   0      1
1     40   0   0   0      1
2      0   0   0  20      1
3      0  30   0   0      2
4      0   0  10   0      2
5     40   0   0   0      3
6      0   0  50   0      3

      

+2


source







All Articles