Coverage for subcell_pipeline/analysis/wall_clock_time/log_data.py: 0%
15 statements
« prev ^ index » next coverage.py v7.5.3, created at 2024-08-29 15:14 +0000
« prev ^ index » next coverage.py v7.5.3, created at 2024-08-29 15:14 +0000
1"""Methods for analyzing simulation logs."""
3import re
5import pandas as pd
6from io_collection.load.load_text import load_text
9def get_wall_clock_time_from_logs(
10 bucket: str,
11 series_name: str,
12 condition_keys: list[str],
13 random_seeds: list[int],
14 job_arns: dict,
15 pattern: str,
16) -> pd.DataFrame:
17 """
18 Extract wall clock times from log files.
20 Parameters
21 ----------
22 bucket
23 Name of S3 bucket for input and output files.
24 series_name
25 Name of simulation series.
26 condition_keys
27 List of condition keys.
28 random_seeds
29 Random seeds for simulations.
30 job_arns
31 Map of conditions to job ARNs.
32 pattern
33 Regex pattern to find wall clock time in log text.
35 Returns
36 -------
37 :
38 Dataframe of wall clock times.
39 """
41 wall_clock_times: list[dict] = []
43 for condition_key in condition_keys:
44 series_key = f"{series_name}_{condition_key}" if condition_key else series_name
45 job_arn = job_arns[condition_key]
47 for index, seed in enumerate(random_seeds):
48 log_key = f"{series_name}/logs/{series_key}_{job_arn}:{index}.log"
49 print(f"Extracting wall clock time from [ {log_key} ]")
51 log = load_text(bucket, log_key)
52 time = re.findall(pattern, log)[0]
54 wall_clock_times.append(
55 {
56 "condition": condition_key,
57 "seed": seed,
58 "time": int(time),
59 }
60 )
62 return pd.DataFrame(wall_clock_times)