python/oskopek/mvae/mt/visualization/read_log.py

read_log.py
# Copyright 2019 Ondrej Skopek.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

import datetime
import os
from numbers import Number
from typing import Dict, List, Tuple

import pandas as pd


def from_iso_format(time: str) -> datetime.datetime:
    return datetime.datetime.strptime(time, "%Y-%m-%dt%H:%M:%S.%f")


def log_to_pd(logfile: str) -> Tuple[str, datetime.datetime, pd.DataFrame, pd.DataFrame]:
    if not os.path.exists(logfile):
        raise ValueError(f"Cannot read logs from non-existent file: '{logfile}'")
    d: Dict[str, List[Number]] = {}
    d_train: Dict[str, List[Number]] = {}
    name = None
    time = None
    with open(logfile, "r") as f:
        for line in f:
            line = line.strip().lower()
            if "vae model" in line:
                split = line.strip().split(";")
                name = split[0].split(":")[-1].strip()
                str_time = split[2].split(":", maxsplit=1)[-1].strip()
                time = from_iso_format(str_time)

                if len(split)  <  4:
                    fixed_curvature = True
                else:
                    fixed_curvature = split[3].split(":", maxsplit=1)[-1].strip() == "true"
                if fixed_curvature:
                    name += "-fixed"

                continue

            if not line.endswith("}"):
                continue

            if line.startswith("epoch "):
                train = False
            elif line.startswith("trainepoch "):
                train = True
            else:
                continue

            idx = line.find(":")
            epoch = int(line[:idx].strip().split(" ")[1])
            dictionary = eval(line[idx + 1:].strip())
            dictionary["epoch"] = epoch
            for key in dictionary:
                if key not in d:
                    d[key] = []
                    d_train[key] = []
            for key in d:
                if train:
                    d_train[key].append(dictionary[key])
                else:
                    d[key].append(dictionary[key])

    if name is None or time is None:
        raise ValueError(f"Unable to parse name ({name}) or time ({time}) from logs.")
    return name, time, pd.DataFrame(d), pd.DataFrame(d_train)