I have a dataset that looks like this:
I want to do the following:
- Make sure the bars do not overlap.
- Treat each bar like a separate dataset, i.e. the labels on the x axis should be separate, one for the yellow series, one for the red series. These labels should be the words (I want to have two series of xtick labels in this chart) One for
words_2
, and one forwords_1
..
Current code:
import matplotlib.pyplot as plt
import numpy as np
import copy
import random
from random import randint
random.seed(11)
word_freq_1 = [('test', 510), ('Hey', 362), ("please", 753), ('take', 446), ('herbert', 325), ('live', 222), ('hate', 210), ('white', 191), ('simple', 175), ('harry', 172), ('woman', 170), ('basil', 153), ('things', 129), ('think', 126), ('bye', 124), ('thing', 120), ('love', 107), ('quite', 107), ('face', 107), ('eyes', 107), ('time', 106), ('himself', 105), ('want', 105), ('good', 105), ('really', 103), ('away',100), ('did', 100), ('people', 99), ('came', 97), ('say', 97), ('cried', 95), ('looked', 94), ('tell', 92), ('look', 91), ('world', 89), ('work', 89), ('project', 88), ('room', 88), ('going', 87), ('answered', 87), ('mr', 87), ('little', 87), ('yes', 84), ('silly', 82), ('thought', 82), ('shall', 81), ('circle', 80), ('hallward', 80), ('told', 77), ('feel', 76), ('great', 74), ('art', 74), ('dear',73), ('picture', 73), ('men', 72), ('long', 71), ('young', 70), ('lady', 69), ('let', 66), ('minute', 66), ('women', 66), ('soul', 65), ('door', 64), ('hand',63), ('went', 63), ('make', 63), ('night', 62), ('asked', 61), ('old', 61), ('passed', 60), ('afraid', 60), ('night', 59), ('looking', 58), ('wonderful', 58), ('gutenberg-tm', 56), ('beauty', 55), ('sir', 55), ('table', 55), ('turned', 54), ('lips', 54), ("one's", 54), ('better', 54), ('got', 54), ('vane', 54), ('right',53), ('left', 53), ('course', 52), ('hands', 52), ('portrait', 52), ('head', 51), ("can't", 49), ('true', 49), ('house', 49), ('believe', 49), ('black', 49), ('horrible', 48), ('oh', 48), ('knew', 47), ('curious', 47), ('myself', 47)]
word_freq_2 = [((tuple[0], randint(1,500))) for i,tuple in enumerate(word_freq_1)]
N = 25
ind = np.arange(N) # the x locations for the groups
width = 0.35 # the width of the bars
fig, ax = plt.subplots()
words_1 = [x[0] for x in word_freq_1][:25]
values_1 = [int(x[1]) for x in word_freq_1][:25]
words_2 = [x[0] for x in word_freq_2][:25]
values_2 = [int(x[1]) for x in word_freq_2][:25]
print words_2
rects1 = ax.bar(ind, values_1, color='r')
rects2 = ax.bar(ind + width, values_2, width, color='y')
# add some text for labels, title and axes ticks
ax.set_ylabel('Words')
ax.set_title('Word Frequencies by Test and Training Set')
ax.set_xticks(ind + width)
ax.set_xticklabels(words_2,rotation=90)
ax.tick_params(axis='both', which='major', labelsize=6)
ax.tick_params(axis='both', which='minor', labelsize=6)
fig.tight_layout()
ax.legend((rects1[0], rects2[0]), ('Test', 'Train'))
plt.savefig('test.png')
I found a solution to this. The key is to set two types of
xticks
asminor
andmajor
. In addition, the overlapping bars was due to me not specifying the bar width forrects1
.Which results in: