Coverage for subcell_pipeline/simulation/cytosim/post_processing.py: 0%

54 statements  

« prev     ^ index     » next       coverage.py v7.5.3, created at 2024-08-29 15:14 +0000

1"""Methods for processing Cytosim simulations.""" 

2 

3from typing import Union 

4 

5import numpy as np 

6import pandas as pd 

7from io_collection.keys.check_key import check_key 

8from io_collection.load.load_text import load_text 

9from io_collection.save.save_dataframe import save_dataframe 

10 

11COLUMN_NAMES: list[str] = [ 

12 "fiber_id", 

13 "xpos", 

14 "ypos", 

15 "zpos", 

16 "xforce", 

17 "yforce", 

18 "zforce", 

19 "segment_curvature", 

20 "time", 

21 "fiber_point", 

22] 

23"""Parsed tidy data column names.""" 

24 

25COLUMN_DTYPES: dict[str, Union[type[float], type[int]]] = { 

26 "fiber_id": int, 

27 "xpos": float, 

28 "ypos": float, 

29 "zpos": float, 

30 "xforce": float, 

31 "yforce": float, 

32 "zforce": float, 

33 "segment_curvature": float, 

34 "time": float, 

35 "fiber_point": int, 

36} 

37"""Parsed tidy data column data types.""" 

38 

39CYTOSIM_SCALE_FACTOR: int = 1000 

40"""Default Cytosim position scaling factor.""" 

41 

42CYTOSIM_RIGIDITY: float = 0.041 

43"""Default Cytosim rigidity.""" 

44 

45 

46def parse_cytosim_simulation_data( 

47 bucket: str, 

48 series_name: str, 

49 condition_keys: list[str], 

50 random_seeds: list[int], 

51) -> None: 

52 """ 

53 Parse Cytosim simulation data for select conditions and seeds. 

54 

55 Parameters 

56 ---------- 

57 bucket 

58 Name of S3 bucket for input and output files. 

59 series_name 

60 Name of simulation series. 

61 condition_keys 

62 List of condition keys. 

63 random_seeds 

64 Random seeds for simulations. 

65 """ 

66 

67 for condition_key in condition_keys: 

68 series_key = f"{series_name}_{condition_key}" if condition_key else series_name 

69 

70 for index, seed in enumerate(random_seeds): 

71 dataframe_key = f"{series_name}/data/{series_key}_{seed:06d}.csv" 

72 

73 # Skip if dataframe file already exists. 

74 if check_key(bucket, dataframe_key): 

75 print(f"Dataframe [ { dataframe_key } ] already exists. Skipping.") 

76 continue 

77 

78 print(f"Parsing data for [ {condition_key} ] seed [ {seed} ]") 

79 

80 output_key_template = f"{series_name}/outputs/{series_key}_{index}/%s" 

81 segment_curvature = load_text( 

82 bucket, output_key_template % "fiber_segment_curvature.txt" 

83 ) 

84 data = parse_cytosim_simulation_curvature_data(segment_curvature) 

85 

86 save_dataframe(bucket, dataframe_key, data, index=False) 

87 

88 

89def parse_cytosim_simulation_curvature_data( 

90 data: str, 

91 rigidity: float = CYTOSIM_RIGIDITY, 

92 scale_factor: int = CYTOSIM_SCALE_FACTOR, 

93) -> pd.DataFrame: 

94 """ 

95 Parse Cytosim fiber segment curvature data into tidy data format. 

96 

97 Parameters 

98 ---------- 

99 data 

100 Output data from Cytosim report fiber:segment_energy. 

101 rigidity 

102 Fiber rigidity used to calculate segment energy. 

103 scale_factor 

104 Scaling factor for fiber points. 

105 

106 Returns 

107 ------- 

108 : 

109 Data for individual fiber points. 

110 """ 

111 

112 point_data: list[list[Union[str, int, float]]] = [] 

113 

114 fiber_index = 0 

115 point_index = 0 

116 

117 # Iterate through each row of the data and extract data for each point. 

118 for line in data.splitlines(): 

119 line_split = line.strip().split() 

120 

121 if line.startswith("%"): 

122 if line.startswith("% time"): 

123 time = float(line.split(" ")[-1]) 

124 print(f"Parsing timepoint [ {time} ]") 

125 elif line.startswith("% end"): 

126 fiber_index = 0 

127 point_index = 0 

128 elif len(line_split) > 0: 

129 if int(fiber_index) == int(line_split[0]): 

130 point_index += 1 

131 else: 

132 point_index = 0 

133 fiber_index += 1 

134 

135 point_data.append([*line_split, time, point_index]) 

136 

137 # Combine all data into dataframe and update data types. 

138 dataframe = pd.DataFrame(point_data, columns=COLUMN_NAMES) 

139 dataframe = dataframe.astype(dtype=COLUMN_DTYPES) 

140 

141 dataframe["xpos"] = dataframe["xpos"] * scale_factor 

142 dataframe["ypos"] = dataframe["ypos"] * scale_factor 

143 dataframe["zpos"] = dataframe["zpos"] * scale_factor 

144 

145 # Calculate force magnitude 

146 dataframe["force_magnitude"] = np.sqrt( 

147 np.square(dataframe["xforce"]) 

148 + np.square(dataframe["yforce"]) 

149 + np.square(dataframe["zforce"]) 

150 ) 

151 

152 # Calculate segment bending energy in pN nm 

153 dataframe["segment_energy"] = dataframe["segment_curvature"] * rigidity * 1000 

154 

155 return dataframe