generate histogram in R for employee -
below dataset containing records of employee attendance
date intime outtime 2 02/11/2015 10:21:27 17:58:12 3 03/11/2015 10:13:09 18:52:44 4 04/11/2015 10:11:52 18:40:36 5 05/11/2015 10:31:42 18:16:57 6 06/11/2015 10:13:13 18:36:15 10 10/11/2015 10:03:20 18:07:52 11 11/11/2015 09:40:20 18:42:20 12 12/11/2015 10:38:56 18:37:20 13 13/11/2015 10:45:26 18:09:54 16 16/11/2015 10:13:13 18:36:15 17 17/11/2015 10:11:43 18:36:15 18 18/11/2015 10:13:13 18:36:15 19 19/11/2015 10:13:13 18:36:15 20 20/11/2015 12:14:25 20:25:08 23 23/11/2015 10:08:08 17:57:35 24 24/11/2015 14:30:32 18:36:15
the total time served employee in hours :
total_time <- with(newdata, sum(pmin(newdata$outtime, "18:00:00") - pmax(newdata$intime, "08:00:00") )) total_time <- 24*floor(as.numeric(total_time)) "total time served employee : 96 hours"
i want generate histogram each employee showing hours served on monthly basis having total of 5 bins.
i changed data such had info more months: ( better histogram)
library(data.table) df = fread(" date intime outtime 02/11/2015 10:21:27 17:58:12 03/11/2015 10:13:09 18:52:44 04/11/2015 10:11:52 18:40:36 05/11/2015 10:31:42 18:16:57 06/11/2015 10:13:13 18:36:15 10/11/2015 10:03:20 18:07:52 11/11/2015 09:40:20 18:42:20 12/11/2015 10:38:56 18:37:20 13/11/2015 10:45:26 18:09:54 16/11/2015 10:13:13 18:36:15 17/11/2015 10:11:43 18:36:15 18/11/2015 10:13:13 18:36:15 19/11/2015 10:13:13 18:36:15 20/11/2015 12:14:25 20:25:08 23/11/2015 10:08:08 17:57:35 24/11/2015 14:30:32 18:36:15") df$intime <- as.posixct(df$intime, format = "%h:%m:%s") df$outtime <- as.posixct(df$outtime, format = "%h:%m:%s") library(lubridate) #to extract day df$day <- dmy(df$date) df$day <- day(df$day) df$total_time <- difftime(pmin(df$outtime, as.posixct("18:00:00", format = "%h:%m:%s")), pmax(df$intime, as.posixct("08:00:00", format = "%h:%m:%s")), units = "hours") df$total_time <- as.numeric(df$total_time) library(ggplot2) ggplot(df, aes(x = day, y= total_time))+geom_histogram(stat = "identity", bins = 5)
with 5 bins (only 4 bins appear in plot because no data 24-30 day's intervals) :
df$breaks <- cut(df$day, breaks = c(0,5,10,15,30,max(df$day))) df1=ddply(df, "breaks", summarise, "total_hr"=sum(total_time)) ggplot(df1, aes(x = breaks, y= total_hr))+ geom_histogram(stat = "identity", bins = 5, binwidth = 0)
Comments
Post a Comment