I have a code that creates histogram from data. Data contains areas of "dots". Now the histogram plots number of dots in each category, but I want calculate dots per square meter (done with calculate_dots_per_m2 function) and then plot histogram where y data is dots/m^2 instead of the original dots/mm^2. Here is my code:
import numpy as np
from matplotlib import pyplot as plt
color_dict = {
"RED": (0, 0, 255),
"GREEN": (0, 225, 0),
"BLUE": (255, 0, 0),
"ORANGE": (0, 165, 255),
"MAGENTA": (255, 0, 255)
}
color_texts = list(color_dict.keys())
colors = list(color_dict.values())
all_areas = [0.01, 0.1, 1, 10, 100]
def calculate_dots_per_m2(num_sizes):
scale_factor = 25
num_of_dots_per_m2 = []
for i, num_of_defects in enumerate(num_sizes):
num_of_dots_per_m2.append(num_of_defects*scale_factor)
total_dots_per_m2 = sum(num_of_dots_per_m2)
return num_of_dots_per_m2, total_dots_per_m2
def make_histogram():
dots = [[] for i in range(len(all_areas))]
all_dot_areas = [0.05, 1.4, 1.5, 1.9, 3.7, 11.5, 29.1, 99.0, 111.1, 4123.1]
for dot_area in all_dot_areas:
for i, a in enumerate(all_areas[:-1]):
if all_areas[i] < dot_area < all_areas[i + 1]:
dots[i].append(dot_area)
if dot_area > all_areas[-1]:
dots[-1].append(dot_area)
num_sizes = [len(subls) for subls in dots]
num_of_dots_per_m2, total_dots_per_m2 = calculate_dots_per_m2(num_sizes)
bins = all_areas + [1e9]
hist, bin_edges = np.histogram(all_dot_areas, bins)
fig, ax = plt.subplots()
fig.set_size_inches(16, 9)
ylabel = "Number of dots per mm^2"
plt.ylabel(ylabel)
ticklabels = []
for i, a in enumerate(all_areas[:-1]):
size_x_label = "Dots sized {}-{} mm^2: {}".format(all_areas[i], all_areas[i + 1], num_sizes[i])
ticklabels.append(size_x_label)
lastlabel = "Dots with size over {} mm^2: {}".format(all_areas[-1], num_sizes[-1])
ticklabels.append(lastlabel)
colours = [c.lower() for c in color_texts]
ax.bar(range(len(hist)), hist, width=0.75, align="center", tick_label=ticklabels, color=colours, ec="black")
plt.show()
make_histogram()
I want to change the code so that the x data is the same (x is the number of dots in each category), but y data is scaled to m^2 (Number of dots per m^2). So in this simple example I would expect all bars to be 25 times higher than they are now. I tried to change this hist, bin_edges = np.histogram(all_dot_areas, bins) to this hist, bin_edges = np.histogram(num_of_dots_per_m2, bins) but it did not help. I don't understand what I should do here.
You can use the
weightsparameter ofhistogramto scale the counts by 25: