aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorEgor Tensin <Egor.Tensin@gmail.com>2016-07-19 23:17:51 +0300
committerEgor Tensin <Egor.Tensin@gmail.com>2016-07-19 23:17:51 +0300
commit5fc68a09752e47e61b0134146ff6795c7295d131 (patch)
treed8c76d3c2f1653eb68241ed762e7b00ee9c8c0f9
parentdocs: update (diff)
downloadvk-scripts-5fc68a09752e47e61b0134146ff6795c7295d131.tar.gz
vk-scripts-5fc68a09752e47e61b0134146ff6795c7295d131.zip
make the terminology more consistent
"Online sessions" instead of "online periods/streaks/durations", "time ranges" instead of "date ranges", etc.
Diffstat (limited to '')
-rw-r--r--bin/online_duration.py39
-rw-r--r--docs/online_duration.md7
-rw-r--r--vk/tracking/__init__.py4
-rw-r--r--vk/tracking/online_sessions.py133
-rw-r--r--vk/tracking/online_streaks.py133
5 files changed, 157 insertions, 159 deletions
diff --git a/bin/online_duration.py b/bin/online_duration.py
index d5b8f35..ac9251b 100644
--- a/bin/online_duration.py
+++ b/bin/online_duration.py
@@ -13,7 +13,7 @@ import sys
import matplotlib.pyplot as plt
import numpy as np
-from vk.tracking import OnlineStreakEnumerator
+from vk.tracking import OnlineSessionEnumerator
from vk.tracking.db import Format as DatabaseFormat
from vk.user import UserField
@@ -23,8 +23,8 @@ class GroupBy(Enum):
WEEKDAY = 'weekday'
HOUR = 'hour'
- def enum_durations(self, db_reader, date_from=None, date_to=None):
- online_streaks = OnlineStreakEnumerator(date_from, date_to)
+ def group(self, db_reader, time_from=None, time_to=None):
+ online_streaks = OnlineSessionEnumerator(time_from, time_to)
if self is GroupBy.USER:
return online_streaks.group_by_user(db_reader)
elif self is GroupBy.DATE:
@@ -80,8 +80,8 @@ class OutputWriterCSV:
raise NotImplementedError('unsupported grouping: ' + str(group_by))
return OutputWriterCSV._CONVERT_KEY[group_by](key)
- def process_database(self, group_by, db_reader, date_from=None, date_to=None):
- for key, duration in group_by.enum_durations(db_reader, date_from, date_to).items():
+ def process_database(self, group_by, db_reader, time_from=None, time_to=None):
+ for key, duration in group_by.group(db_reader, time_from, time_to).items():
row = self._key_to_row(group_by, key)
row.append(str(duration))
self._write_row(row)
@@ -148,9 +148,9 @@ class OutputWriterJSON:
self._fd.write(json.dumps(x, indent=3, ensure_ascii=False))
self._fd.write('\n')
- def process_database(self, group_by, db_reader, date_from=None, date_to=None):
+ def process_database(self, group_by, db_reader, time_from=None, time_to=None):
arr = []
- for key, duration in group_by.enum_durations(db_reader, date_from, date_to).items():
+ for key, duration in group_by.group(db_reader, time_from, time_to).items():
obj = self._key_to_object(group_by, key)
obj[self._DURATION_FIELD] = str(duration)
arr.append(obj)
@@ -300,10 +300,9 @@ class OutputWriterPlot:
return tuple(map(OutputWriterPlot._duration_to_seconds, durations.values()))
def process_database(
- self, group_by, db_reader, date_from=None, date_to=None):
+ self, group_by, db_reader, time_from=None, time_to=None):
- durations = group_by.enum_durations(
- db_reader, date_from, date_to)
+ durations = group_by.group(db_reader, time_from, time_to)
bar_chart = BarChartBuilder()
@@ -394,7 +393,7 @@ def _parse_args(args=sys.argv):
type=_parse_group_by,
choices=GroupBy,
default=GroupBy.USER,
- help='group online streaks by user/date/etc.')
+ help='group online sessions by user/date/etc.')
parser.add_argument('-i', '--input-format', dest='db_fmt',
type=_parse_database_format,
default=DatabaseFormat.CSV,
@@ -405,12 +404,12 @@ def _parse_args(args=sys.argv):
choices=OutputFormat,
default=OutputFormat.CSV,
help='specify output format')
- parser.add_argument('-a', '--from', dest='date_from',
+ parser.add_argument('-a', '--from', dest='time_from',
type=_parse_date_range_limit, default=None,
- help='set the date to process database records from')
- parser.add_argument('-b', '--to', dest='date_to',
+ help='discard online activity prior to this moment')
+ parser.add_argument('-b', '--to', dest='time_to',
type=_parse_date_range_limit, default=None,
- help='set the date to process database record to')
+ help='discard online activity after this moment')
return parser.parse_args(args[1:])
@@ -418,16 +417,16 @@ def write_online_duration(
db_fd, db_fmt=DatabaseFormat.CSV,
fd=sys.stdout, fmt=OutputFormat.CSV,
group_by=GroupBy.USER,
- date_from=None, date_to=None):
+ time_from=None, time_to=None):
- if date_from is not None and date_to is not None:
- if date_from > date_to:
- date_from, date_to = date_to, date_from
+ if time_from is not None and time_to is not None:
+ if time_from > time_to:
+ time_from, time_to = time_to, time_from
with db_fmt.create_reader(db_fd) as db_reader:
output_writer = fmt.create_writer(fd)
output_writer.process_database(
- group_by, db_reader, date_from=date_from, date_to=date_to)
+ group_by, db_reader, time_from=time_from, time_to=time_to)
def main(args=sys.argv):
args = _parse_args(args)
diff --git a/docs/online_duration.md b/docs/online_duration.md
index 81e5e48..f5c703b 100644
--- a/docs/online_duration.md
+++ b/docs/online_duration.md
@@ -12,7 +12,7 @@ Run from the top-level directory using `python -m`:
> python -m bin.online_duration -h
usage: online_duration.py [-h] [-g {user,date,weekday,hour}]
[-i {csv,log,null}] [-o {csv,json,plot}]
- [-a DATE_FROM] [-b DATE_TO]
+ [-a TIME_FROM] [-b TIME_TO]
input [output]
```
@@ -141,10 +141,9 @@ a file path to write the image to.
![hour.png]
You can limit the scope of the database by supplying a time range.
-Only online durations that are within the supplied range shall then be
-processed.
+Only online sessions that overlap with this range shall then be processed.
Set the range by specifying both or one of the `--from` and `--to` parameters.
-Values must be in the `%Y-%m-%dT%H:%M:%SZ` format (a subset of ISO 8601).
+The values must be in the `%Y-%m-%dT%H:%M:%SZ` format (a subset of ISO 8601).
All dates and times are in UTC.
diff --git a/vk/tracking/__init__.py b/vk/tracking/__init__.py
index cfc80ea..0404094 100644
--- a/vk/tracking/__init__.py
+++ b/vk/tracking/__init__.py
@@ -2,7 +2,7 @@
# This file is licensed under the terms of the MIT License.
# See LICENSE.txt for details.
-from .online_streaks import OnlineStreakEnumerator, Weekday
+from .online_sessions import OnlineSessionEnumerator, Weekday
from .status_tracker import StatusTracker
-__all__ = 'online_streaks', 'status_tracker',
+__all__ = 'online_sessions', 'status_tracker',
diff --git a/vk/tracking/online_sessions.py b/vk/tracking/online_sessions.py
new file mode 100644
index 0000000..debf1e6
--- /dev/null
+++ b/vk/tracking/online_sessions.py
@@ -0,0 +1,133 @@
+# Copyright 2016 Egor Tensin <Egor.Tensin@gmail.com>
+# This file is licensed under the terms of the MIT License.
+# See LICENSE.txt for details.
+
+from collections import OrderedDict
+from collections.abc import MutableMapping
+from datetime import timedelta
+from enum import Enum
+
+class Weekday(Enum):
+ MONDAY = 0
+ TUESDAY = 1
+ WEDNESDAY = 2
+ THURSDAY = 3
+ FRIDAY = 4
+ SATURDAY = 5
+ SUNDAY = 6
+
+ def __str__(self):
+ return self.name[0] + self.name[1:].lower()
+
+class OnlineSessionEnumerator(MutableMapping):
+ def __init__(self, time_from=None, time_to=None):
+ self._records = {}
+ self._time_from = time_from
+ self._time_to = time_to
+
+ def __getitem__(self, user):
+ return self._records[user]
+
+ def __setitem__(self, user, record):
+ self._records[user] = record
+
+ def __delitem__(self, user):
+ del self._records[user]
+
+ def __iter__(self):
+ return iter(self._records)
+
+ def __len__(self):
+ return len(self._records)
+
+ def _trim_or_drop_session(self, session):
+ user, started_at, ended_at = session
+ if self._time_from is not None:
+ if ended_at < self._time_from:
+ return None
+ if started_at < self._time_from:
+ started_at = self._time_from
+ if self._time_to is not None:
+ if started_at > self._time_to:
+ return None
+ if ended_at > self._time_to:
+ ended_at = self._time_to
+ return user, started_at, ended_at
+
+ def read_database(self, db_reader):
+ for record in db_reader:
+ session = self._process_database_record(record)
+ if session is not None:
+ session = self._trim_or_drop_session(session)
+ if session is not None:
+ yield session
+
+ def group_by_user(self, db_reader):
+ by_user = {}
+ for user, started_at, ended_at in self.read_database(db_reader):
+ if user not in by_user:
+ by_user[user] = timedelta()
+ by_user[user] += ended_at - started_at
+ return by_user
+
+ def group_by_date(self, db_reader):
+ by_date = {}
+ for _, started_at, ended_at in self.read_database(db_reader):
+ for date, duration in self._split_into_days(started_at, ended_at):
+ if date not in by_date:
+ by_date[date] = timedelta()
+ by_date[date] += duration
+ return by_date
+
+ def group_by_weekday(self, db_reader):
+ by_weekday = OrderedDict()
+ for weekday in Weekday:
+ by_weekday[weekday] = timedelta()
+ for _, started_at, ended_at in self.read_database(db_reader):
+ for date, duration in self._split_into_days(started_at, ended_at):
+ by_weekday[Weekday(date.weekday())] += duration
+ return by_weekday
+
+ def group_by_hour(self, db_reader):
+ by_hour = OrderedDict()
+ for i in range(24):
+ by_hour[i] = timedelta()
+ for _, started_at, ended_at in self.read_database(db_reader):
+ for hour, duration in self._split_into_hours(started_at, ended_at):
+ by_hour[hour] += duration
+ return by_hour
+
+ @staticmethod
+ def _split_into_days(a, b):
+ while a.date() != b.date():
+ next_day = a.date() + timedelta(days=1)
+ yield a.date(), next_day - a
+ a = next_day
+ yield b.date(), b - a
+
+ @staticmethod
+ def _split_into_hours(a, b):
+ while a.date() != b.date() or a.hour != b.hour:
+ next_hour = a.replace(minute=0, second=0) + timedelta(hours=1)
+ yield a.hour, next_hour - a
+ a = next_hour
+ yield b.hour, b - a
+
+ def _process_database_record(self, record):
+ return self._close_user_session(record.to_user())
+
+ def _known_user(self, user):
+ return user.get_uid() in self._records
+
+ def _unknown_user(self, user):
+ return not self._known_user(user)
+
+ def _close_user_session(self, user):
+ if user not in self or self[user].is_offline():
+ self[user] = user
+ return None
+ if user.is_online():
+ return None
+ session = user, self[user].get_last_seen_time(), user.get_last_seen_time()
+ self[user] = user
+ return session
diff --git a/vk/tracking/online_streaks.py b/vk/tracking/online_streaks.py
deleted file mode 100644
index db24053..0000000
--- a/vk/tracking/online_streaks.py
+++ /dev/null
@@ -1,133 +0,0 @@
-# Copyright 2016 Egor Tensin <Egor.Tensin@gmail.com>
-# This file is licensed under the terms of the MIT License.
-# See LICENSE.txt for details.
-
-from collections import OrderedDict
-from collections.abc import MutableMapping
-from datetime import timedelta
-from enum import Enum
-
-class Weekday(Enum):
- MONDAY = 0
- TUESDAY = 1
- WEDNESDAY = 2
- THURSDAY = 3
- FRIDAY = 4
- SATURDAY = 5
- SUNDAY = 6
-
- def __str__(self):
- return self.name[0] + self.name[1:].lower()
-
-class OnlineStreakEnumerator(MutableMapping):
- def __init__(self, date_from=None, date_to=None):
- self._records = {}
- self._date_from = date_from
- self._date_to = date_to
-
- def __getitem__(self, user):
- return self._records[user]
-
- def __setitem__(self, user, record):
- self._records[user] = record
-
- def __delitem__(self, user):
- del self._records[user]
-
- def __iter__(self):
- return iter(self._records)
-
- def __len__(self):
- return len(self._records)
-
- def _cut_period(self, streak):
- user, time_from, time_to = streak
- if self._date_from is not None:
- if time_to < self._date_from:
- return None
- if time_from < self._date_from:
- time_from = self._date_from
- if self._date_to is not None:
- if time_from > self._date_to:
- return None
- if time_to > self._date_to:
- time_to = self._date_to
- return user, time_from, time_to
-
- def enum(self, db_reader):
- for record in db_reader:
- streak = self._insert_record(record)
- if streak is not None:
- streak = self._cut_period(streak)
- if streak is not None:
- yield streak
-
- def group_by_user(self, db_reader):
- by_user = {}
- for user, time_from, time_to in self.enum(db_reader):
- if user not in by_user:
- by_user[user] = timedelta()
- by_user[user] += time_to - time_from
- return by_user
-
- def group_by_date(self, db_reader):
- by_date = OrderedDict()
- for _, time_from, time_to in self.enum(db_reader):
- for date, duration in self._enum_dates_and_durations(time_from, time_to):
- if date not in by_date:
- by_date[date] = timedelta()
- by_date[date] += duration
- return by_date
-
- def group_by_weekday(self, db_reader):
- by_weekday = OrderedDict()
- for weekday in Weekday:
- by_weekday[weekday] = timedelta()
- for _, time_from, time_to in self.enum(db_reader):
- for date, duration in self._enum_dates_and_durations(time_from, time_to):
- by_weekday[Weekday(date.weekday())] += duration
- return by_weekday
-
- def group_by_hour(self, db_reader):
- by_hour = OrderedDict()
- for i in range(24):
- by_hour[i] = timedelta()
- for _, time_from, time_to in self.enum(db_reader):
- for hour, duration in self._enum_hours_and_durations(time_from, time_to):
- by_hour[hour] += duration
- return by_hour
-
- @staticmethod
- def _enum_dates_and_durations(time_from, time_to):
- while time_from.date() != time_to.date():
- next_day = time_from.date() + timedelta(days=1)
- yield time_from.date(), next_day - time_from
- time_from = next_day
- yield time_to.date(), time_to - time_from
-
- @staticmethod
- def _enum_hours_and_durations(time_from, time_to):
- while time_from.date() != time_to.date() or time_from.hour != time_to.hour:
- next_hour = time_from.replace(minute=0, second=0) + timedelta(hours=1)
- yield time_from.hour, next_hour - time_from
- time_from = next_hour
- yield time_to.hour, time_to - time_from
-
- def _insert_record(self, record):
- return self._insert_user(record.to_user())
-
- def _known_user(self, user):
- return user.get_uid() in self._records
-
- def _unknown_user(self, user):
- return not self._known_user(user)
-
- def _insert_user(self, user):
- if user not in self or self[user].is_offline():
- self[user] = user
- return None
- if user.is_online():
- return None
- streak = user, self[user].get_last_seen_time(), user.get_last_seen_time()
- self[user] = user
- return streak