Separate halves of a split script to compare tail data
Is there a way to physically separate the two halves of a "split" violin on a violin (or other type of scripting)? I am trying to compare two different treatments, but there is a skinny tail and it is difficult (impossible) to tell if one or both halves of the split violin make it to the end of the tail.
It seemed to me that if the two halves were slightly separated from each other, and not immediately next to each other, then it would be easy to accumulate data.
Here is my code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import style
import seaborn as sns
# load data into a dataframe
df1 = pd.read_excel('Modeling analysis charts.xlsx',
sheetname='lmps',
parse_cols=[0,5],
skiprows=0,
header=1)
# identify which dispatch run this data is from
df1['Run']='Scheduling'
# load data into a dataframe
df2 = pd.read_excel('Modeling analysis charts.xlsx',
sheetname='lmps',
parse_cols=[7,12],
skiprows=0,
header=1)
# identify which dispatch run this data is from
df2['Run']='Pricing'
# drop rows with missing data
df1 = df1.dropna(how='any')
df2 = df2.dropna(how='any')
# merge data from different runs
df = pd.concat([df1,df2])
# LMPs are all opposite of actual values, so correct that
df['LMP'] = -df['LMP']
fontsize = 10
style.use('fivethirtyeight')
fig, axes = plt.subplots()
sns.violinplot(x='Scenario', y='LMP', hue='Run', split=True, data=df, inner=None, scale='area', bw=0.2, cut=0, linewidth=0.5, ax = axes)
axes.set_title('Day Ahead Market')
#axes.set_ylim([-15,90])
axes.yaxis.grid(True)
axes.set_xlabel('Scenario')
axes.set_ylabel('LMP ($/MWh)')
#plt.savefig('DAMarket.pdf', bbox_inches='tight')
plt.show()
source to share
EDIT: For historical reasons this is the accepted answer, but take a look at @conchoecia for a newer and cleaner implementation.
Cool idea. The main idea of ββmy implementation is to draw all of this, grab the patches corresponding to the two semi-scripts, and then shift the paths of those patches to the left or right. The code is hopefully on its own, otherwise let me know in the comments.
import numpy as np
import matplotlib.pyplot as plt;
import matplotlib.collections
import seaborn as sns
import pandas as pd
# create some data
n = 10000 # number of samples
c = 5 # classes
y = np.random.randn(n)
x = np.random.randint(0, c, size=n)
z = np.random.rand(n) > 0.5 # sub-class
data = pd.DataFrame(dict(x=x, y=y, z=z))
# initialise new axis;
# if there is random other crap on the axis (e.g. a previous plot),
# the hacky code below won't work
fig, ax = plt.subplots(1,1)
# plot
inner = None # Note: 'box' is default
ax = sns.violinplot(data=data, x='x', y='y', hue='z', split=True, inner=inner, ax=ax)
# offset stuff
delta = 0.02
for ii, item in enumerate(ax.collections):
# axis contains PolyCollections and PathCollections
if isinstance(item, matplotlib.collections.PolyCollection):
# get path
path, = item.get_paths()
vertices = path.vertices
# shift x-coordinates of path
if not inner:
if ii % 2: # -> to right
vertices[:,0] += delta
else: # -> to left
vertices[:,0] -= delta
else: # inner='box' adds another type of PollyCollection
if ii % 3 == 0:
vertices[:,0] -= delta
elif ii % 3 == 1:
vertices[:,0] += delta
else: # ii % 3 = 2
pass
source to share
I've expanded on @ Paul's answer above and made it more reliable. It now supports vertical and horizontal orientation, and I've implemented it to work with inner = 'sticks' as it suits my application.
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.collections
import seaborn as sns
import pandas as pd
def offset_violinplot_halves(ax, delta, width, inner, direction):
"""
This function offsets the halves of a violinplot to compare tails
or to plot something else in between them. This is specifically designed
for violinplots by Seaborn that use the option `split=True`.
For lines, this works on the assumption that Seaborn plots everything with
integers as the center.
Args:
<ax> The axis that contains the violinplots.
<delta> The amount of space to put between the two halves of the violinplot
<width> The total width of the violinplot, as passed to sns.violinplot()
<inner> The type of inner in the seaborn
<direction> Orientation of violinplot. 'hotizontal' or 'vertical'.
Returns:
- NA, modifies the <ax> directly
"""
# offset stuff
if inner == 'sticks':
lines = ax.get_lines()
for line in lines:
if direction == 'horizontal':
data = line.get_ydata()
print(data)
if int(data[0] + 1)/int(data[1] + 1) < 1:
# type is top, move neg, direction backwards for horizontal
data -= delta
else:
# type is bottom, move pos, direction backward for hori
data += delta
line.set_ydata(data)
elif direction == 'vertical':
data = line.get_xdata()
print(data)
if int(data[0] + 1)/int(data[1] + 1) < 1:
# type is left, move neg
data -= delta
else:
# type is left, move pos
data += delta
line.set_xdata(data)
for ii, item in enumerate(ax.collections):
# axis contains PolyCollections and PathCollections
if isinstance(item, matplotlib.collections.PolyCollection):
# get path
path, = item.get_paths()
vertices = path.vertices
half_type = _wedge_dir(vertices, direction)
# shift x-coordinates of path
if half_type in ['top','bottom']:
if inner in ["sticks", None]:
if half_type == 'top': # -> up
vertices[:,1] -= delta
elif half_type == 'bottom': # -> down
vertices[:,1] += delta
elif half_type in ['left', 'right']:
if inner in ["sticks", None]:
if half_type == 'left': # -> left
vertices[:,0] -= delta
elif half_type == 'right': # -> down
vertices[:,0] += delta
def _wedge_dir(vertices, direction):
"""
Args:
<vertices> The vertices from matplotlib.collections.PolyCollection
<direction> Direction must be 'horizontal' or 'vertical' according to how
your plot is laid out.
Returns:
- a string in ['top', 'bottom', 'left', 'right'] that determines where the
half of the violinplot is relative to the center.
"""
if direction == 'horizontal':
result = (direction, len(set(vertices[1:5,1])) == 1)
elif direction == 'vertical':
result = (direction, len(set(vertices[-3:-1,0])) == 1)
outcome_key = {('horizontal', True): 'bottom',
('horizontal', False): 'top',
('vertical', True): 'left',
('vertical', False): 'right'}
# if the first couple x/y values after the start are the same, it
# is the input direction. If not, it is the opposite
return outcome_key[result]
# create some data
n = 100 # number of samples
c = ['cats', 'rats', 'bears', 'pears', 'snares'] # classes
y = np.random.randn(n)
x = np.random.choice(c, size=n)
z = np.random.rand(n) > 0.5 # sub-class
data = pd.DataFrame(dict(x=x, y=y, z=z))
print('done making data')
# initialise new axes;
fig, (ax1, ax2) = plt.subplots(2)
inner = "sticks" # Note: 'box' is default
width = 0.75
delta = 0.05
final_width = width - delta
print(data)
sns.violinplot(data=data, x='y', y='x',
split=True, hue = 'z',
ax = ax1, inner='sticks',
bw = 0.2)
sns.violinplot(data=data, x='x', y='y',
split=True, hue = 'z',
ax = ax2, inner='sticks',
bw = 0.2)
offset_violinplot_halves(ax1, delta, final_width, inner, 'horizontal')
offset_violinplot_halves(ax2, delta, final_width, inner, 'vertical')
plt.show()
source to share