Coverage for subcell_pipeline/analysis/wall_clock_time/log_data.py: 0%

15 statements  

« prev     ^ index     » next       coverage.py v7.5.3, created at 2024-08-29 15:14 +0000

1"""Methods for analyzing simulation logs.""" 

2 

3import re 

4 

5import pandas as pd 

6from io_collection.load.load_text import load_text 

7 

8 

9def get_wall_clock_time_from_logs( 

10 bucket: str, 

11 series_name: str, 

12 condition_keys: list[str], 

13 random_seeds: list[int], 

14 job_arns: dict, 

15 pattern: str, 

16) -> pd.DataFrame: 

17 """ 

18 Extract wall clock times from log files. 

19 

20 Parameters 

21 ---------- 

22 bucket 

23 Name of S3 bucket for input and output files. 

24 series_name 

25 Name of simulation series. 

26 condition_keys 

27 List of condition keys. 

28 random_seeds 

29 Random seeds for simulations. 

30 job_arns 

31 Map of conditions to job ARNs. 

32 pattern 

33 Regex pattern to find wall clock time in log text. 

34 

35 Returns 

36 ------- 

37 : 

38 Dataframe of wall clock times. 

39 """ 

40 

41 wall_clock_times: list[dict] = [] 

42 

43 for condition_key in condition_keys: 

44 series_key = f"{series_name}_{condition_key}" if condition_key else series_name 

45 job_arn = job_arns[condition_key] 

46 

47 for index, seed in enumerate(random_seeds): 

48 log_key = f"{series_name}/logs/{series_key}_{job_arn}:{index}.log" 

49 print(f"Extracting wall clock time from [ {log_key} ]") 

50 

51 log = load_text(bucket, log_key) 

52 time = re.findall(pattern, log)[0] 

53 

54 wall_clock_times.append( 

55 { 

56 "condition": condition_key, 

57 "seed": seed, 

58 "time": int(time), 

59 } 

60 ) 

61 

62 return pd.DataFrame(wall_clock_times)