Color conditional data in a plot with matplotlib threw a loop
I have the following data file
import pandas as pd
import matplotlib.pyplot as plt
datas = [['RAC1','CD0287',1.52,9.88], ['RAC1','CD0695',2.08,10.05],['RAC1','CD0845',2.01,10.2], ['RAC3','CD0258',1.91,9.8], ['RAC3','CD471',1.66,9.6], ['RAC8','CD0558',1.32,9.3], ['RAC8','CD0968',2.89,10.01]]
labels = ['Plate', 'Sample', 'LogRatio', 'Strength']
df = pd.DataFrame(data = datas, columns=labels, index=[8, 3, 5, 4, 12, 44, 2])
print(df)
Plate Sample LogRatio Strength
8 RAC1 CD0287 1.52 9.88
3 RAC1 CD0695 2.08 10.05
5 RAC1 CD0845 2.01 10.20
4 RAC3 CD0258 1.91 9.80
12 RAC3 CD471 1.66 9.60
44 RAC8 CD0558 1.32 9.30
2 RAC8 CD0968 2.89 10.01
As you can see, my data is spread across different platters. I would like to create as many plots as I have different records: 3 plots. And for each plot, I would like to paint one plate red and the rest black.
The only way I have found so far is to do it manually by writing code for each cymbal and changing the red plate to trigger the wounds (I have over 30 cymbals in fact, so it takes too long). I can show you my code if it helps you understand:
def getIndexPlates(df):
listIndicesAllPlates = []
df = df.reset_index()
for name,group in df.groupby("Plate"):
temp_list = []
temp_list.append(name)
temp_list.append(group.index.tolist()) #create a tuple with the name of the plate and the index of all the samples in this plate
listIndexAllPlates.append(temp_list)
return listIndexAllPlates
def plotting(df,listIndexAllPlates):
plt.clf()
ax=plt.gca()
datas = df[["LogRatio", "Strength"]].as_matrix()
for sample in range(len(datas)):
if sample in listIndexAllPlates[0][1]: #if the sample is on the the first tuple of my list -> on the first plate
ax.scatter(datas[sample,0], datas[sample,1], alpha=0.8, facecolors='none', edgecolors='red')
if sample in listIndexAllPlates[1][1]:
ax.scatter(datas[sample,0], datas[sample,1], alpha=0.8, facecolors='none', edgecolors='black')
if sample in listIndexAllPlates[2][1]:
ax.scatter(datas[sample,0], datas[sample,1], alpha=0.8, facecolors='none', edgecolors='black')
plt.show()
listIndexAllPlates = getIndexPlates(df)
plotting(df,listIndexAllPlates)
So I have my first plot with RAC1 in red and RAC3 and RAC8 in black, and now I would like to have a second plot with RAC3 in red (RAC1 and RAC8 in black) and a third plot with RAC8 in red (RAC1 and RAC3 in black). To do this, I manually change the color in my function, but I would like the solution to be done automatically. And I know my path is really bad and ugly, I just don't know how.
You can use groupby
here in conjunction with the difference
pandas Index object to loop through your plates and get the indices for the current plate and the rest of them:
for label, plate_df in df.groupby("Plate"):
plate_indices = plate_df.index
rest_indices = df.index.difference(plate_indices)
# do your plotting here accordingly
print(label, plate_indices, rest_indices)
RAC1 Int64Index([8, 3, 5], dtype='int64') Int64Index([2, 4, 12, 44], dtype='int64')
RAC3 Int64Index([4, 12], dtype='int64') Int64Index([2, 3, 5, 8, 44], dtype='int64')
RAC8 Int64Index([44, 2], dtype='int64') Int64Index([3, 4, 5, 8, 12], dtype='int64')
Edit
To enable plotting, just include your matplotlib instructions:
plot_kwargs = {"alpha": 0.8, "facecolors": "none"}
for label, plate_df in df.groupby("Plate"):
plate_indices = plate_df.index
rest_indices = df.index.difference(plate_indices)
# create plot
plt.clf()
ax=plt.gca()
ax.scatter(df.loc[plate_indices, "LogRatio"], df.loc[plate_indices, "Strength"], edgecolors='red', **plot_kwargs)
ax.scatter(df.loc[rest_indices, "LogRatio"], df.loc[rest_indices, "Strength"], edgecolors='black', **plot_kwargs)
plt.show()