From d0f37f5c56533150252ea4be4a27c0a6cf8c9742 Mon Sep 17 00:00:00 2001 From: Egor Tensin Date: Sat, 18 Jun 2016 03:53:15 +0300 Subject: group online periods by date/user/etc. --- vk/utils/tracking/online_periods.py | 63 ++++++++++++++++++------- vk/utils/tracking/utils/how_much_online.py | 75 ++++++++++++++---------------- 2 files changed, 81 insertions(+), 57 deletions(-) (limited to 'vk/utils/tracking') diff --git a/vk/utils/tracking/online_periods.py b/vk/utils/tracking/online_periods.py index 9a99863..8bd2bda 100644 --- a/vk/utils/tracking/online_periods.py +++ b/vk/utils/tracking/online_periods.py @@ -2,45 +2,76 @@ # This file is licensed under the terms of the MIT License. # See LICENSE.txt for details. +from collections import OrderedDict from collections.abc import MutableMapping +from datetime import timedelta from vk.user import User class OnlinePeriodEnumerator(MutableMapping): def __init__(self): - self._records_by_user = {} + self._records = {} - def __getitem__(self, key): - return self._records_by_user[self._normalize_key(key)] + def __getitem__(self, user): + return self._records[user] - def __setitem__(self, key, value): - self._records_by_user[self._normalize_key(key)] = value + def __setitem__(self, user, record): + self._records[user] = record - def __delitem__(self, key): - del self._records_by_user[self._normalize_key(key)] + def __delitem__(self, user): + del self._records[user] def __iter__(self): - return iter(self._records_by_user) + return iter(self._records) def __len__(self): - return len(self._records_by_user) - - @staticmethod - def _normalize_key(key): - return key.get_uid() if isinstance(key, User) else key + return len(self._records) def enum(self, db_reader): for record in db_reader: period = self._insert_record(record) - #print(period) if period is not None: yield period + def duration_by_user(self, db_reader): + by_user = {} + for user, time_from, time_to in self.enum(db_reader): + if user not in by_user: + by_user[user] = timedelta() + by_user[user] += time_to - time_from + return by_user + + def duration_by_date(self, db_reader): + by_date = OrderedDict() + for _, time_from, time_to in self.enum(db_reader): + for date, duration in self._enum_dates_and_durations(time_from, time_to): + if date not in by_date: + by_date[date] = timedelta() + by_date[date] += duration + return by_date + + def duration_by_weekday(self, db_reader): + by_weekday = OrderedDict() + for weekday in range(7): + by_weekday[weekday] = timedelta() + for _, time_from, time_to in self.enum(db_reader): + for date, duration in self._enum_dates_and_durations(time_from, time_to): + by_weekday[date.weekday()] += duration + return by_weekday + + @staticmethod + def _enum_dates_and_durations(time_from, time_to): + while time_from.date() != time_to.date(): + next_day = time_from.date() + timedelta(days=1) + yield time_from.date(), next_day - time_from + time_from = next_day + yield time_to.date(), time_to - time_from + def _insert_record(self, record): return self._insert_user(record.to_user()) def _known_user(self, user): - return user.get_uid() in self._records_by_user + return user.get_uid() in self._records def _unknown_user(self, user): return not self._known_user(user) @@ -48,10 +79,8 @@ class OnlinePeriodEnumerator(MutableMapping): def _insert_user(self, user): if user not in self or self[user].is_offline(): self[user] = user - #print(2) return None if user.is_online(): - #print(3) print(user._fields) return None period = user, self[user].get_last_seen_time(), user.get_last_seen_time() diff --git a/vk/utils/tracking/utils/how_much_online.py b/vk/utils/tracking/utils/how_much_online.py index b40c357..6ab4d1e 100644 --- a/vk/utils/tracking/utils/how_much_online.py +++ b/vk/utils/tracking/utils/how_much_online.py @@ -12,19 +12,14 @@ import sys import matplotlib.pyplot as plt import numpy as np +from .. import OnlinePeriodEnumerator from ..db import Format as DatabaseFormat from vk.user import UserField def process_database(db_reader, writer): - from vk.utils.tracking import OnlinePeriodEnumerator - wasted_time_by_user = {} - for online_period in OnlinePeriodEnumerator().enum(db_reader): - user, time_from, time_to = online_period - if user not in wasted_time_by_user: - wasted_time_by_user[user] = timedelta() - wasted_time_by_user[user] = time_to - time_from - for user, wasted_time in wasted_time_by_user.items(): - writer.write_wasted_time(user, wasted_time) + by_user = OnlinePeriodEnumerator().duration_by_user(db_reader) + for user, duration in by_user.items(): + writer.add_user_duration(user, duration) class OutputFormat(Enum): CSV = 'csv' @@ -34,7 +29,7 @@ class OutputFormat(Enum): def __str__(self): return self.value -OUTPUT_USER_FIELDS = ( +_USER_FIELDS = ( UserField.UID, UserField.FIRST_NAME, UserField.LAST_NAME, @@ -51,42 +46,42 @@ class OutputWriterCSV: def __exit__(self, *args): pass - def write_wasted_time(self, user, wasted_time): - self._write_row(self._wasted_time_to_row(user, wasted_time)) + def add_user_duration(self, user, duration): + self._write_row(self._user_duration_to_row(user, duration)) def _write_row(self, row): self._writer.writerow(row) @staticmethod - def _wasted_time_to_row(user, wasted_time): + def _user_duration_to_row(user, duration): row = [] - for field in OUTPUT_USER_FIELDS: + for field in _USER_FIELDS: row.append(user[field]) - row.append(str(wasted_time)) + row.append(str(duration)) return row class OutputWriterJSON: def __init__(self, fd=sys.stdout): self._fd = fd - self._records = [] + self._array = [] def __enter__(self): return self def __exit__(self, *args): - self._fd.write(json.dumps(self._records, indent=3)) + self._fd.write(json.dumps(self._array, indent=3)) - def write_wasted_time(self, user, wasted_time): - self._records.append(self._wasted_time_to_record(user, wasted_time)) + def add_user_duration(self, user, duration): + self._array.append(self._user_duration_to_object(user, duration)) - _WASTED_TIME_FIELD = 'wasted_time' + _DURATION_FIELD = 'duration' @staticmethod - def _wasted_time_to_record(user, wasted_time): + def _user_duration_to_object(user, duration): record = OrderedDict() - for field in OUTPUT_USER_FIELDS: + for field in _USER_FIELDS: record[str(field)] = user[field] - record[OutputWriterJSON._WASTED_TIME_FIELD] = str(wasted_time) + record[OutputWriterJSON._DURATION_FIELD] = str(duration) return record class BarChartBuilder: @@ -169,7 +164,7 @@ class BarChartBuilder: class PlotBuilder: def __init__(self, fd=sys.stdout): - self._wasted_time_by_user = {} + self._duration_by_user = {} self._fd = fd pass @@ -177,22 +172,22 @@ class PlotBuilder: return self @staticmethod - def _format_user_name(user): + def _format_user(user): return '{}\n{}'.format(user.get_first_name(), user.get_last_name()) @staticmethod - def _format_wasted_time(seconds, _): + def _format_duration(seconds, _): return str(timedelta(seconds=seconds)) @staticmethod - def _wasted_time_to_seconds(td): + def _duration_to_seconds(td): return td.total_seconds() - def _get_user_names(self): - return tuple(map(self._format_user_name, self._wasted_time_by_user.keys())) + def _get_users(self): + return tuple(map(self._format_user, self._duration_by_user.keys())) - def _get_wasted_seconds(self): - return tuple(map(self._wasted_time_to_seconds, self._wasted_time_by_user.values())) + def _get_durations(self): + return tuple(map(self._duration_to_seconds, self._duration_by_user.values())) def __exit__(self, *args): bar_chart = BarChartBuilder() @@ -203,15 +198,15 @@ class PlotBuilder: bar_chart.set_integer_values_only() bar_chart.set_property(bar_chart.get_value_labels(), fontsize='small', rotation=30) - bar_chart.set_value_label_formatter(self._format_wasted_time) + bar_chart.set_value_label_formatter(self._format_duration) - users = self._get_user_names() - wasted_time = self._get_wasted_seconds() + users = self._get_users() + durations = self._get_durations() - if not self._wasted_time_by_user or not max(wasted_time): + if not self._duration_by_user or not max(durations): bar_chart.set_value_axis_limits(0) - bars = bar_chart.plot_bars(users, wasted_time) + bars = bar_chart.plot_bars(users, durations) bar_chart.set_property(bars, alpha=.33) if self._fd is sys.stdout: @@ -219,12 +214,12 @@ class PlotBuilder: else: bar_chart.save(self._fd) - def write_wasted_time(self, user, wasted_time): - #if len(self._wasted_time_by_user) >= 1: + def add_user_duration(self, user, duration): + #if len(self._duration_by_user) >= 1: # return - #if wasted_time.total_seconds(): + #if duration.total_seconds(): # return - self._wasted_time_by_user[user] = wasted_time # + timedelta(seconds=3) + self._duration_by_user[user] = duration # + timedelta(seconds=3) def open_output_writer_csv(fd): return OutputWriterCSV(fd) -- cgit v1.2.3