aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/vk
diff options
context:
space:
mode:
authorEgor Tensin <Egor.Tensin@gmail.com>2016-06-18 03:53:15 +0300
committerEgor Tensin <Egor.Tensin@gmail.com>2016-06-18 03:53:15 +0300
commitd0f37f5c56533150252ea4be4a27c0a6cf8c9742 (patch)
tree43531ab480c274f9603ba06712f416ce200853fa /vk
parentput format-specific db writers/readers together (diff)
downloadvk-scripts-d0f37f5c56533150252ea4be4a27c0a6cf8c9742.tar.gz
vk-scripts-d0f37f5c56533150252ea4be4a27c0a6cf8c9742.zip
group online periods by date/user/etc.
Diffstat (limited to 'vk')
-rw-r--r--vk/utils/tracking/online_periods.py63
-rw-r--r--vk/utils/tracking/utils/how_much_online.py75
2 files changed, 81 insertions, 57 deletions
diff --git a/vk/utils/tracking/online_periods.py b/vk/utils/tracking/online_periods.py
index 9a99863..8bd2bda 100644
--- a/vk/utils/tracking/online_periods.py
+++ b/vk/utils/tracking/online_periods.py
@@ -2,45 +2,76 @@
# This file is licensed under the terms of the MIT License.
# See LICENSE.txt for details.
+from collections import OrderedDict
from collections.abc import MutableMapping
+from datetime import timedelta
from vk.user import User
class OnlinePeriodEnumerator(MutableMapping):
def __init__(self):
- self._records_by_user = {}
+ self._records = {}
- def __getitem__(self, key):
- return self._records_by_user[self._normalize_key(key)]
+ def __getitem__(self, user):
+ return self._records[user]
- def __setitem__(self, key, value):
- self._records_by_user[self._normalize_key(key)] = value
+ def __setitem__(self, user, record):
+ self._records[user] = record
- def __delitem__(self, key):
- del self._records_by_user[self._normalize_key(key)]
+ def __delitem__(self, user):
+ del self._records[user]
def __iter__(self):
- return iter(self._records_by_user)
+ return iter(self._records)
def __len__(self):
- return len(self._records_by_user)
-
- @staticmethod
- def _normalize_key(key):
- return key.get_uid() if isinstance(key, User) else key
+ return len(self._records)
def enum(self, db_reader):
for record in db_reader:
period = self._insert_record(record)
- #print(period)
if period is not None:
yield period
+ def duration_by_user(self, db_reader):
+ by_user = {}
+ for user, time_from, time_to in self.enum(db_reader):
+ if user not in by_user:
+ by_user[user] = timedelta()
+ by_user[user] += time_to - time_from
+ return by_user
+
+ def duration_by_date(self, db_reader):
+ by_date = OrderedDict()
+ for _, time_from, time_to in self.enum(db_reader):
+ for date, duration in self._enum_dates_and_durations(time_from, time_to):
+ if date not in by_date:
+ by_date[date] = timedelta()
+ by_date[date] += duration
+ return by_date
+
+ def duration_by_weekday(self, db_reader):
+ by_weekday = OrderedDict()
+ for weekday in range(7):
+ by_weekday[weekday] = timedelta()
+ for _, time_from, time_to in self.enum(db_reader):
+ for date, duration in self._enum_dates_and_durations(time_from, time_to):
+ by_weekday[date.weekday()] += duration
+ return by_weekday
+
+ @staticmethod
+ def _enum_dates_and_durations(time_from, time_to):
+ while time_from.date() != time_to.date():
+ next_day = time_from.date() + timedelta(days=1)
+ yield time_from.date(), next_day - time_from
+ time_from = next_day
+ yield time_to.date(), time_to - time_from
+
def _insert_record(self, record):
return self._insert_user(record.to_user())
def _known_user(self, user):
- return user.get_uid() in self._records_by_user
+ return user.get_uid() in self._records
def _unknown_user(self, user):
return not self._known_user(user)
@@ -48,10 +79,8 @@ class OnlinePeriodEnumerator(MutableMapping):
def _insert_user(self, user):
if user not in self or self[user].is_offline():
self[user] = user
- #print(2)
return None
if user.is_online():
- #print(3)
print(user._fields)
return None
period = user, self[user].get_last_seen_time(), user.get_last_seen_time()
diff --git a/vk/utils/tracking/utils/how_much_online.py b/vk/utils/tracking/utils/how_much_online.py
index b40c357..6ab4d1e 100644
--- a/vk/utils/tracking/utils/how_much_online.py
+++ b/vk/utils/tracking/utils/how_much_online.py
@@ -12,19 +12,14 @@ import sys
import matplotlib.pyplot as plt
import numpy as np
+from .. import OnlinePeriodEnumerator
from ..db import Format as DatabaseFormat
from vk.user import UserField
def process_database(db_reader, writer):
- from vk.utils.tracking import OnlinePeriodEnumerator
- wasted_time_by_user = {}
- for online_period in OnlinePeriodEnumerator().enum(db_reader):
- user, time_from, time_to = online_period
- if user not in wasted_time_by_user:
- wasted_time_by_user[user] = timedelta()
- wasted_time_by_user[user] = time_to - time_from
- for user, wasted_time in wasted_time_by_user.items():
- writer.write_wasted_time(user, wasted_time)
+ by_user = OnlinePeriodEnumerator().duration_by_user(db_reader)
+ for user, duration in by_user.items():
+ writer.add_user_duration(user, duration)
class OutputFormat(Enum):
CSV = 'csv'
@@ -34,7 +29,7 @@ class OutputFormat(Enum):
def __str__(self):
return self.value
-OUTPUT_USER_FIELDS = (
+_USER_FIELDS = (
UserField.UID,
UserField.FIRST_NAME,
UserField.LAST_NAME,
@@ -51,42 +46,42 @@ class OutputWriterCSV:
def __exit__(self, *args):
pass
- def write_wasted_time(self, user, wasted_time):
- self._write_row(self._wasted_time_to_row(user, wasted_time))
+ def add_user_duration(self, user, duration):
+ self._write_row(self._user_duration_to_row(user, duration))
def _write_row(self, row):
self._writer.writerow(row)
@staticmethod
- def _wasted_time_to_row(user, wasted_time):
+ def _user_duration_to_row(user, duration):
row = []
- for field in OUTPUT_USER_FIELDS:
+ for field in _USER_FIELDS:
row.append(user[field])
- row.append(str(wasted_time))
+ row.append(str(duration))
return row
class OutputWriterJSON:
def __init__(self, fd=sys.stdout):
self._fd = fd
- self._records = []
+ self._array = []
def __enter__(self):
return self
def __exit__(self, *args):
- self._fd.write(json.dumps(self._records, indent=3))
+ self._fd.write(json.dumps(self._array, indent=3))
- def write_wasted_time(self, user, wasted_time):
- self._records.append(self._wasted_time_to_record(user, wasted_time))
+ def add_user_duration(self, user, duration):
+ self._array.append(self._user_duration_to_object(user, duration))
- _WASTED_TIME_FIELD = 'wasted_time'
+ _DURATION_FIELD = 'duration'
@staticmethod
- def _wasted_time_to_record(user, wasted_time):
+ def _user_duration_to_object(user, duration):
record = OrderedDict()
- for field in OUTPUT_USER_FIELDS:
+ for field in _USER_FIELDS:
record[str(field)] = user[field]
- record[OutputWriterJSON._WASTED_TIME_FIELD] = str(wasted_time)
+ record[OutputWriterJSON._DURATION_FIELD] = str(duration)
return record
class BarChartBuilder:
@@ -169,7 +164,7 @@ class BarChartBuilder:
class PlotBuilder:
def __init__(self, fd=sys.stdout):
- self._wasted_time_by_user = {}
+ self._duration_by_user = {}
self._fd = fd
pass
@@ -177,22 +172,22 @@ class PlotBuilder:
return self
@staticmethod
- def _format_user_name(user):
+ def _format_user(user):
return '{}\n{}'.format(user.get_first_name(), user.get_last_name())
@staticmethod
- def _format_wasted_time(seconds, _):
+ def _format_duration(seconds, _):
return str(timedelta(seconds=seconds))
@staticmethod
- def _wasted_time_to_seconds(td):
+ def _duration_to_seconds(td):
return td.total_seconds()
- def _get_user_names(self):
- return tuple(map(self._format_user_name, self._wasted_time_by_user.keys()))
+ def _get_users(self):
+ return tuple(map(self._format_user, self._duration_by_user.keys()))
- def _get_wasted_seconds(self):
- return tuple(map(self._wasted_time_to_seconds, self._wasted_time_by_user.values()))
+ def _get_durations(self):
+ return tuple(map(self._duration_to_seconds, self._duration_by_user.values()))
def __exit__(self, *args):
bar_chart = BarChartBuilder()
@@ -203,15 +198,15 @@ class PlotBuilder:
bar_chart.set_integer_values_only()
bar_chart.set_property(bar_chart.get_value_labels(),
fontsize='small', rotation=30)
- bar_chart.set_value_label_formatter(self._format_wasted_time)
+ bar_chart.set_value_label_formatter(self._format_duration)
- users = self._get_user_names()
- wasted_time = self._get_wasted_seconds()
+ users = self._get_users()
+ durations = self._get_durations()
- if not self._wasted_time_by_user or not max(wasted_time):
+ if not self._duration_by_user or not max(durations):
bar_chart.set_value_axis_limits(0)
- bars = bar_chart.plot_bars(users, wasted_time)
+ bars = bar_chart.plot_bars(users, durations)
bar_chart.set_property(bars, alpha=.33)
if self._fd is sys.stdout:
@@ -219,12 +214,12 @@ class PlotBuilder:
else:
bar_chart.save(self._fd)
- def write_wasted_time(self, user, wasted_time):
- #if len(self._wasted_time_by_user) >= 1:
+ def add_user_duration(self, user, duration):
+ #if len(self._duration_by_user) >= 1:
# return
- #if wasted_time.total_seconds():
+ #if duration.total_seconds():
# return
- self._wasted_time_by_user[user] = wasted_time # + timedelta(seconds=3)
+ self._duration_by_user[user] = duration # + timedelta(seconds=3)
def open_output_writer_csv(fd):
return OutputWriterCSV(fd)