-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathprocess_hmi.py
221 lines (182 loc) · 9.02 KB
/
process_hmi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
import argparse
import pprint
import sys
import datetime
import os
from tqdm import tqdm
from tqdm.contrib.concurrent import process_map
from sunpy.coordinates import sun
from sunpy.map import Map
import skimage.transform
import numpy as np
import matplotlib.pyplot as plt
from glob import glob
# Mask to remove text printed lower left
# Example: http://jsoc.stanford.edu/data/hmi/images/2024/01/01/20240101_000000_M_1k.jpg
mask = np.ones((1024,1024))
mask[990:,:300] = 0.
def read_hmi_jpg(file_name):
x = plt.imread(file_name)
x = x.mean(axis=2)
x *= mask
x /= 255.
return x
def find_sun_ratio(data):
# Returns the ratio: diameter of the solar disk / length of the image side
if data.shape[0] != data.shape[1]:
raise ValueError('Expecting square image')
size = data.shape[0]
mid_i = size // 2
color = data[mid_i, 0]
space_left = 0
for i in range(1, mid_i):
if data[mid_i, i] == color:
color = data[mid_i, i]
space_left += 1
else:
break
color = data[0, mid_i]
space_top = 0
for i in range(1, mid_i):
if data[i, mid_i] == color:
color = data[i, mid_i]
space_top += 1
else:
break
space = (space_left + space_top)/2.
ratio = (size - 2*space)/size
return ratio
# HMI postprocessing based on SDOML code, with some modifications
# https://github.com/SDOML/SDOML/blob/bea846347b2cd64d81fdcf1baf88a245a1bcb429/hmi_fits_to_np.py
def process(args):
source_file, target_file, resolution = args
try:
X = read_hmi_jpg(source_file)
print('\nSource: {}'.format(source_file))
except Exception as e:
print('Error: {}'.format(e))
return False
# Fail if the file is too small, indicates an empty image (all black with just the text label)
# There are 91 such files between in 2010 May and July
if os.path.getsize(source_file) < 30000:
return False
# The HMI data product we use is not in FITS format and does not provide the metadata RSUN_OBS, so we need to estimate the scale factor
# Original scale factor calculation which we cannot do
# rad = Xd.meta['RSUN_OBS']
# scale_factor = trgtAS/rad
# Method 1: Use the angular radius of the Sun as seen from Earth (instead of SDO, but it should be close enough))
# Based on code: https://github.com/sunpy/sunpy/blob/934a4439d420a6edf0196cc9325e770121db3d39/sunpy/coordinates/sun.py#L53
# trgtAS = 976.0
# hmi_basename = os.path.basename(source_file)
# date = datetime.datetime.strptime(hmi_basename[:13], '%Y%m%d_%H%M')
# rad = sun.angular_radius(date).to('arcsec').value
# scale_factor = trgtAS/rad
# Method 2: Use the ratio of the diameter of the solar disk to the length of the image side
# target_sun_ratio = 0.8 # This is the end result of the AIA scaling (AIA images end up having 10% length on each side of the solar disk)
# ratio = find_sun_ratio(X)
# scale_factor = target_sun_ratio / ratio
# Method 3: Use the RSUN_OBS metadata from corresponding AIA FITS files which we have. There seems to be a simple relationship between AIA RSUN_OBS and HMI RSUN_OBS which we assume to be constant.
# aia_rsun_obs = Map(aia_file).meta['RSUN_OBS']
# trgtAS = 976.0
# aia_scale_factor = trgtAS / aia_rsun_obs
# scale_factor = aia_scale_factor * 0.85
hmi_basename = os.path.basename(source_file)
hmi_dir = os.path.dirname(source_file)
date = datetime.datetime.strptime(hmi_basename[:13], '%Y%m%d_%H%M')
# Try to find a very close AIA file
aia_found = False
if date.minute == 15:
date = date.replace(minute=14)
elif date.minute == 45:
date = date.replace(minute=44)
aia_files_pattern_prefix = datetime.datetime.strftime(date, 'AIA%Y%m%d_%H%M')
aia_files_pattern = aia_files_pattern_prefix + '*.fits'
aia_files_found = glob(os.path.join(hmi_dir, aia_files_pattern))
if len(aia_files_found) > 0:
aia_file_preferences = [os.path.join(hmi_dir, aia_files_pattern_prefix + '_' + postfix + '.fits') for postfix in ['0131','0171','0193','0211','0094','1600','1700']]
aia_files = [file for file in aia_file_preferences if file in aia_files_found]
if len(aia_files) > 0:
aia_file = aia_files[0]
aia_found = True
# If no close AIA file is found, use any AIA file from the same day
if not aia_found:
aia_files_found = glob(os.path.join(hmi_dir, 'AIA*.fits'))
if len(aia_files_found) > 0:
aia_file = aia_files_found[0]
aia_found = True
if aia_found:
print('Using AIA file metadata for RSUN_OBS: {}'.format(os.path.basename(aia_file)))
try:
aia_rsun_obs = Map(aia_file).meta['RSUN_OBS']
trgtAS = 976.0
aia_scale_factor = trgtAS / aia_rsun_obs
scale_factor = aia_scale_factor * 0.85 # This is a factor determined empirically by inspecting some images for various dates
print('AIA scale factor : {}'.format(aia_scale_factor))
print('Scale factor (based on AIA) : {}'.format(scale_factor))
except Exception as e:
print('Error: {}'.format(e))
print('Failed to read AIA file metadata: {}'.format(aia_file))
aia_found = False
# If no AIA files are found, fall back to Method 2 (should not happen often)
if not aia_found:
print('No AIA files found for HMI file : {}'.format(source_file))
target_sun_ratio = 0.8 # This is the end result of the AIA scaling (AIA images end up having 10% length on each side of the solar disk)
ratio = find_sun_ratio(X)
scale_factor = target_sun_ratio / ratio
print('Scale factor (not based on AIA) : {}'.format(scale_factor))
#fix the translation
t = (X.shape[0]/2.0)-scale_factor*(X.shape[0]/2.0)
#rescale and keep center
XForm = skimage.transform.SimilarityTransform(scale=scale_factor,translation=(t,t))
Xr = skimage.transform.warp(X,XForm.inverse,preserve_range=True,mode='edge',output_shape=(X.shape[0],X.shape[0]))
#figure out the integer factor to downsample by mean
divideFactor = int(X.shape[0] / resolution)
Xr = skimage.transform.downscale_local_mean(Xr,(divideFactor,divideFactor))
#cast to fp32
Xr = Xr.astype('float32')
os.makedirs(os.path.dirname(target_file), exist_ok=True)
np.save(target_file, Xr)
print('Target: {}'.format(target_file))
return True
def main():
description = 'SDOML-lite, SDO HMI data processor'
parser = argparse.ArgumentParser(description=description)
parser.add_argument('--source_dir', type=str, help='Source directory', required=True)
parser.add_argument('--target_dir', type=str, help='Destination directory', required=True)
parser.add_argument('--max_workers', type=int, default=1, help='Max workers')
parser.add_argument('--worker_chunk_size', type=int, default=1, help='Chunk size per worker')
parser.add_argument('--resolution', type=int, default=512, help='Pixel resolution of processed images. Should be a divisor of 1024.')
parser.add_argument('--degradation_dir', type=str, default='./degradation/v9', help='Directory with degradation correction files')
args = parser.parse_args()
print(description)
start_time = datetime.datetime.now()
print('Start time: {}'.format(start_time))
print('Arguments:\n{}'.format(' '.join(sys.argv[1:])))
print('Config:')
pprint.pprint(vars(args), depth=2, width=50)
# walk through the source directory with glob, find all .jpg files, and create a corresponding file name ending in .npy in the target dir, keeping the directory structure
# set the source and target directories, strip final slash if present
source_dir = args.source_dir.rstrip('/')
target_dir = args.target_dir.rstrip('/')
# get all .fits files in the source directory
jpg_files = glob(os.path.join(source_dir, '**', '*.jpg'), recursive=True)
if len(jpg_files) == 0:
print('No files found in source directory: {}'.format(source_dir))
return
# create a list of tuples with the source and target file names
# be careful to strip or add slashes as needed
file_names = []
for source_file in jpg_files:
target_file = source_file.replace(source_dir, target_dir).replace('.jpg', '.npy')
target_file = target_file.replace('00_M_1k', '_M')
target_file = target_file.replace(os.path.basename(target_file), 'HMI' + os.path.basename(target_file))
file_names.append((source_file, target_file, args.resolution))
# process the files
results = process_map(process, file_names, max_workers=args.max_workers, chunksize=args.worker_chunk_size)
print('Files processed: {}'.format(results.count(True)))
print('Files failed : {}'.format(results.count(False)))
print('Files total : {}'.format(len(results)))
print('End time: {}'.format(datetime.datetime.now()))
print('Duration: {}'.format(datetime.datetime.now() - start_time))
if __name__ == '__main__':
main()