Radial Velocity 3D Isosurface for 33 Million Stars From GAIA DR3 Data Visualization

by matt392 in Circuits > Computers

15 Views, 0 Favorites, 0 Comments

Radial Velocity 3D Isosurface for 33 Million Stars From GAIA DR3 Data Visualization

Radial-velocity-33m-stars-#3.png
Radial-velocity-33m-stars-#2.png
Radial-velocity-33m-stars.png

Radial Velocity 3D Isosurface for 33 Million Stars From GAIA DR3 Data Visualization

Created using Python and Claude.ai

30 Second Video Rotation: https://youtu.be/ZUqtpMxYCYE

"""
3D Stellar Density Radial Velocity Isosurface — Streaming Large Dataset Version
GAIA DR3 | ~33M stars
Streaming chunked loading — no all_chunks list — RAM-safe for any file size
"""

import os
import pandas as pd
import numpy as np
from scipy.ndimage import gaussian_filter
import pyvista as pv
import multiprocessing as mp
import time
import imageio

os.chdir(r'C:\temp')

# ╔══════════════════════════════════════════════════════════════╗
# ║ SETTINGS ║
# ╚══════════════════════════════════════════════════════════════╝

CSV_FILES = [
'33m-stars-radial-velocity.csv'
]

RA_COL = 'ra'
DEC_COL = 'dec'
PAR_COL = 'parallax'

MAX_DIST_LY = 32600
GRID_SIZE = 400
SIGMA = 1

N_CORES = 4
CHUNK_SIZE = 500_000

RING_DISTANCES = [500, 1000, 2000, 3262, 5000, 8000, 10000, 13000, 16000]
LABELS = []
OUTPUT_IMAGE = 'isosurface_1B_still.png'

RV_COL = 'radial_velocity'
ARROW_STRIDE = 8
RV_THRESHOLD = None # set after d_max is known — handled automatically

# ╔══════════════════════════════════════════════════════════════╗
# ║ MULTIPROCESSING WORKER ║
# ╚══════════════════════════════════════════════════════════════╝

def process_chunk_worker(args):
chunk, x_range, y_range, z_range, max_dist = args

chunk = chunk.copy()
chunk = chunk.dropna(subset=[RA_COL, DEC_COL, PAR_COL])
chunk = chunk[chunk[PAR_COL] > 0]
chunk['distance_ly'] = 3262.0 / chunk[PAR_COL]
chunk = chunk[np.isfinite(chunk['distance_ly'])]
chunk = chunk[(chunk['distance_ly'] > 0) & (chunk['distance_ly'] <= max_dist)]

gs = len(x_range) - 1
if len(chunk) == 0:
return np.zeros((gs, gs, gs), dtype=np.float32), 0

ra_rad = np.radians(chunk[RA_COL].values)
dec_rad = np.radians(chunk[DEC_COL].values)
dist = chunk['distance_ly'].values

xs = dist * np.cos(dec_rad) * np.cos(ra_rad)
ys = dist * np.cos(dec_rad) * np.sin(ra_rad)
zs = dist * np.sin(dec_rad)

mask = (np.abs(xs) < max_dist) & (np.abs(ys) < max_dist) & (np.abs(zs) < max_dist)
xs, ys, zs = xs[mask], ys[mask], zs[mask]

if len(xs) == 0:
return np.zeros((gs, gs, gs), dtype=np.float32), 0

H, _ = np.histogramdd(
np.column_stack([xs, ys, zs]),
bins=(x_range, y_range, z_range),
)
return H.astype(np.float32), int(mask.sum())


# ╔══════════════════════════════════════════════════════════════╗
# ║ STREAMING CHUNK GENERATOR ║
# ╚══════════════════════════════════════════════════════════════╝

def chunk_generator(csv_path, x_range, y_range, z_range, max_dist):
"""Yields one chunk at a time — never loads full file into RAM."""
for chunk in pd.read_csv(
csv_path,
usecols=[RA_COL, DEC_COL, PAR_COL],
dtype={RA_COL: 'float32', DEC_COL: 'float32', PAR_COL: 'float32'},
chunksize=CHUNK_SIZE,
low_memory=False,
):
yield (chunk, x_range, y_range, z_range, max_dist)


# ╔══════════════════════════════════════════════════════════════╗
# ║ MAIN ║
# ╚══════════════════════════════════════════════════════════════╝

if __name__ == '__main__':
print("Script started!")
t0 = time.time()

print("=" * 65)
print(" 3D Stellar Density Isosurface Radial Velocity — Streaming 33M Stars")
print("=" * 65)
print(f" Files: {CSV_FILES}")
print(f" Max dist: {MAX_DIST_LY:,} ly")
print(f" Grid: {GRID_SIZE}³ = {GRID_SIZE**3:,} cells")
print(f" Cores: {N_CORES}")
print(f" Chunk size: {CHUNK_SIZE:,}")
print("=" * 65)

# ── Build grid edges ──────────────────────────────────────
lim = MAX_DIST_LY + 5
x_range = np.linspace(-lim, lim, GRID_SIZE)
y_range = np.linspace(-lim, lim, GRID_SIZE)
z_range = np.linspace(-lim, lim, GRID_SIZE)

H = np.zeros((GRID_SIZE - 1, GRID_SIZE - 1, GRID_SIZE - 1), dtype=np.float32)
total_stars = 0

# ── Stream & process each file ────────────────────────────
for csv_path in CSV_FILES:
if not os.path.exists(csv_path):
print(f"\nWARNING: File not found — {csv_path} (skipping)")
continue

file_size_gb = os.path.getsize(csv_path) / 1e9
print(f"\nStreaming: {csv_path} ({file_size_gb:.2f} GB)")
file_stars = 0
chunk_count = 0

with mp.Pool(processes=N_CORES) as pool:
gen = chunk_generator(csv_path, x_range, y_range, z_range, MAX_DIST_LY)
for h_part, count in pool.imap(process_chunk_worker, gen, chunksize=1):
H += h_part
file_stars += count
chunk_count += 1
if chunk_count % 10 == 0:
elapsed = time.time() - t0
rate = file_stars / elapsed if elapsed > 0 else 0
print(f" Chunk {chunk_count:>5} | Stars: {file_stars:,} | "
f"{elapsed:.0f}s | {rate/1e6:.2f}M stars/sec")

print(f" File total: {file_stars:,} stars")
total_stars += file_stars

print(f"\n{'='*65}")
print(f"All files loaded. Total valid stars: {total_stars:,}")
print(f"Elapsed: {(time.time()-t0)/60:.1f} min")

# ── Smooth ────────────────────────────────────────────────
print(f"\nApplying Gaussian smoothing (sigma={SIGMA})...")
H_smooth = gaussian_filter(H, sigma=SIGMA)
del H

d_min, d_max, d_mean = H_smooth.min(), H_smooth.max(), H_smooth.mean()
print(f"Density stats: min={d_min:.4f}, max={d_max:.4f}, mean={d_mean:.4f}")
print(f"Non-zero cells: {np.sum(H_smooth > 0):,} / {H_smooth.size:,}")

# ── Diagnostic: top 20 densest cells ─────────────────────
print("\n── Top 20 Densest Grid Cells ──")
x_centers = (x_range[:-1] + x_range[1:]) / 2
y_centers = (y_range[:-1] + y_range[1:]) / 2
z_centers = (z_range[:-1] + z_range[1:]) / 2

flat_idx = np.argsort(H_smooth.ravel())[-20:][::-1]
ix, iy, iz = np.unravel_index(flat_idx, H_smooth.shape)

print(f"{'Rank':<5} {'Density':<10} {'X(ly)':<9} {'Y(ly)':<9} {'Z(ly)':<9}"
f" {'RA(deg)':<9} {'Dec(deg)':<10} {'Dist(ly)':<9}")
for rank, (i, j, k) in enumerate(zip(ix, iy, iz), 1):
cx, cy, cz = x_centers[i], y_centers[j], z_centers[k]
dist_c = np.sqrt(cx**2 + cy**2 + cz**2)
ra_d = np.degrees(np.arctan2(cy, cx)) % 360
dec_d = np.degrees(np.arcsin(np.clip(cz / dist_c, -1, 1))) if dist_c > 0 else 0
print(f"{rank:<5} {H_smooth[i,j,k]:<10.2f} {cx:<9.1f} {cy:<9.1f} {cz:<9.1f}"
f" {ra_d:<9.1f} {dec_d:<10.1f} {dist_c:<9.1f}")

print("\nHint: Sco-Cen ~ RA 269, Dec -35 | Orion OB1 ~ RA 84, Dec 0")
print(" Local Bubble boundary ~ 300-1000 ly | Pleiades ~ RA 56, Dec +24\n")

# ── PyVista grid ──────────────────────────────────────────
print("Building PyVista grid...")
grid = pv.ImageData()
grid.dimensions = np.array(H_smooth.shape)
spacing_val = (2 * lim) / (GRID_SIZE - 1)
grid.spacing = (spacing_val, spacing_val, spacing_val)
grid.origin = (-lim, -lim, -lim)
grid.point_data['density'] = H_smooth.flatten(order='F')

# ── Auto-scaled thresholds ────────────────────────────────
RV_THRESHOLD = d_max * 0.02
thresholds = [
(d_max * 0.005, 'darkblue', 0.15, 'Ultra sparse (0.5%)'),
(d_max * 0.01, 'blue', 0.20, 'Very sparse (1%)'),
(d_max * 0.02, 'cyan', 0.30, 'Sparse (2%)'),
(d_max * 0.05, 'green', 0.45, 'Medium (5%)'),
(d_max * 0.10, 'yellow', 0.55, 'Dense (10%)'),
(d_max * 0.20, 'orange', 0.70, 'Very dense (20%)'),
(d_max * 0.40, 'red', 0.85, 'Maximum (40%)'),
]

print(f"Auto-scaled thresholds (max density = {d_max:.2f}):")
for t in thresholds:
print(f" {t[3]}: {t[0]:.4f}")

# ── Plotter ───────────────────────────────────────────────
plotter = pv.Plotter()
plotter.set_background('black')

surfaces_added = 0
for threshold, color, opacity, label in thresholds:
try:
contour = grid.contour([threshold], scalars='density')
if contour.n_points > 0:
plotter.add_mesh(contour, color=color, opacity=opacity,
show_edges=False, smooth_shading=True, label=label)
surfaces_added += 1
print(f" Surface [{label}] @ {threshold:.4f}: {contour.n_points:,} points")
else:
print(f" No surface at {threshold:.4f}")
except Exception as e:
print(f" Failed at {threshold:.4f}: {e}")

print(f"Total surfaces added: {surfaces_added}")

# ── Accumulate all 3 vector components ───────────────────────
RV_vx = np.zeros((GRID_SIZE-1,)*3, dtype=np.float32)
RV_vy = np.zeros((GRID_SIZE-1,)*3, dtype=np.float32)
RV_vz = np.zeros((GRID_SIZE-1,)*3, dtype=np.float32)
RV_count = np.zeros((GRID_SIZE-1,)*3, dtype=np.float32)

for csv_path in CSV_FILES:
chunk_num = 0
for chunk in pd.read_csv(
csv_path,
usecols=[RA_COL, DEC_COL, PAR_COL, RV_COL],
dtype={RA_COL:'float32', DEC_COL:'float32',
PAR_COL:'float32', RV_COL:'float32'},
chunksize=CHUNK_SIZE,
low_memory=False,
):
chunk = chunk.dropna(subset=[RA_COL, DEC_COL, PAR_COL, RV_COL])
chunk = chunk[chunk[PAR_COL] > 0]
chunk['distance_ly'] = 3262.0 / chunk[PAR_COL]
chunk = chunk[np.isfinite(chunk['distance_ly'])]
chunk = chunk[(chunk['distance_ly'] > 0) & (chunk['distance_ly'] <= MAX_DIST_LY)]
if len(chunk) == 0:
continue

ra_rad = np.radians(chunk[RA_COL].values)
dec_rad = np.radians(chunk[DEC_COL].values)
dist = chunk['distance_ly'].values
rv = chunk[RV_COL].values

xs = dist * np.cos(dec_rad) * np.cos(ra_rad)
ys = dist * np.cos(dec_rad) * np.sin(ra_rad)
zs = dist * np.sin(dec_rad)

# Line-of-sight unit vector scaled by radial velocity
norm = np.maximum(dist, 1e-6)
vx = (xs / norm) * rv
vy = (ys / norm) * rv
vz = (zs / norm) * rv

gs = GRID_SIZE - 1
xi = np.clip(np.searchsorted(x_range, xs) - 1, 0, gs-1)
yi = np.clip(np.searchsorted(y_range, ys) - 1, 0, gs-1)
zi = np.clip(np.searchsorted(z_range, zs) - 1, 0, gs-1)

np.add.at(RV_vx, (xi, yi, zi), vx)
np.add.at(RV_vy, (xi, yi, zi), vy)
np.add.at(RV_vz, (xi, yi, zi), vz)
np.add.at(RV_count, (xi, yi, zi), 1)

chunk_num += 1
if chunk_num % 10 == 0:
print(f" RV chunk {chunk_num}")

# ── Mean velocity per voxel ───────────────────────────────────
valid_cells = RV_count > 0
RV_vx[valid_cells] /= RV_count[valid_cells]
RV_vy[valid_cells] /= RV_count[valid_cells]
RV_vz[valid_cells] /= RV_count[valid_cells]

# ── Build arrow positions and vectors ────────────────────────
x_centers = (x_range[:-1] + x_range[1:]) / 2
y_centers = (y_range[:-1] + y_range[1:]) / 2
z_centers = (z_range[:-1] + z_range[1:]) / 2

# Apply density threshold + stride
gs = GRID_SIZE - 1
ix = np.arange(0, gs, ARROW_STRIDE)
iy = np.arange(0, gs, ARROW_STRIDE)
iz = np.arange(0, gs, ARROW_STRIDE)
IX, IY, IZ = np.meshgrid(ix, iy, iz, indexing='ij')
IX, IY, IZ = IX.ravel(), IY.ravel(), IZ.ravel()

density_vals = H_smooth[IX, IY, IZ]
mask = density_vals > RV_THRESHOLD

IX, IY, IZ = IX[mask], IY[mask], IZ[mask]

arrow_x = x_centers[IX]
arrow_y = y_centers[IY]
arrow_z = z_centers[IZ]

arrow_vx = RV_vx[IX, IY, IZ]
arrow_vy = RV_vy[IX, IY, IZ]
arrow_vz = RV_vz[IX, IY, IZ]

print(f"Arrows to render: {len(arrow_x):,}")

# ── Add to plotter ────────────────────────────────────────────
arrow_points = np.column_stack([arrow_x, arrow_y, arrow_z])
arrow_vectors = np.column_stack([arrow_vx, arrow_vy, arrow_vz])

# Color by RV magnitude (+ = receding, - = approaching)
rv_magnitude = np.sqrt(arrow_vx**2 + arrow_vy**2 + arrow_vz**2)

pdata = pv.PolyData(arrow_points)
pdata['vectors'] = arrow_vectors
pdata['rv_speed'] = rv_magnitude

arrows = pdata.glyph(orient='vectors', scale='rv_speed',
factor=80, # scale arrow size — adjust as needed
geom=pv.Arrow())

plotter.add_mesh(arrows, scalars='rv_speed', cmap='coolwarm',
show_scalar_bar=True, scalar_bar_args={
'title': 'Radial Velocity (km/s)',
'color': 'white'
})

print("Vector field added!")


# ── Annotations ───────────────────────────────────────────
plotter.add_text(
f'3D Stellar Density Isosurfaces Radial Velocity\n{total_stars:,} Stars | GAIA DR3',
position='upper_edge', font_size=12, color='white'
)
plotter.add_text(
'Thin shells near Earth = Stellar Desert\nThick shells at distance = Dense regions',
position='lower_left', font_size=9, color='white'
)
plotter.show_axes()

# ── Earth marker ──────────────────────────────────────────
origin_sphere = pv.Sphere(radius=MAX_DIST_LY * 0.012, center=(0, 0, 0))
plotter.add_mesh(origin_sphere, color='white', opacity=0.9)
plotter.add_point_labels(
[[0, 0, 0]], ['Earth'], font_size=12,
text_color='white', point_color='white',
point_size=0, always_visible=True, shape_opacity=0
)

# ── Legend ────────────────────────────────────────────────
legend_entries = [
['Ultra sparse (0.5%)', 'darkblue'],
['Very sparse (1%)', 'blue'],
['Sparse (2%)', 'cyan'],
['Medium (5%)', 'green'],
['Dense (10%)', 'yellow'],
['Very dense (20%)', 'orange'],
['Maximum (40%)', 'red'],
]
plotter.add_legend(legend_entries, bcolor=(0.1, 0.1, 0.1),
border=True, loc='center left', size=(0.18, 0.22))

# ── Show ──────────────────────────────────────────────────
# ── Show settings ─────────────────────────────────────────
INTERACTIVE = True
MAKE_VIDEO = True

plotter.camera_position = 'iso'
plotter.camera.zoom(1.2)

if INTERACTIVE:
plotter.show()

if MAKE_VIDEO:
# Rebuild plotter for video (PyVista closes renderer after show())
plotter2 = pv.Plotter(off_screen=True)
plotter2.set_background('black')

# Re-add isosurfaces
for threshold, color, opacity, label in thresholds:
try:
contour = grid.contour([threshold], scalars='density')
if contour.n_points > 0:
plotter2.add_mesh(contour, color=color, opacity=opacity,
show_edges=False, smooth_shading=True)
except:
pass

# Re-add arrows
plotter2.add_mesh(arrows, scalars='rv_speed', cmap='coolwarm',
show_scalar_bar=True, scalar_bar_args={
'title': 'Radial Velocity (km/s)',
'color': 'white'
})

# Re-add annotations
plotter2.add_text(
f'3D Stellar Density Isosurfaces Radial Velocity\n{total_stars:,} Stars | GAIA DR3',
position='upper_edge', font_size=12, color='white'
)
plotter2.add_text(
'Thin shells near Earth = Stellar Desert\nThick shells at distance = Dense regions',
position='lower_left', font_size=9, color='white'
)
plotter2.show_axes()

# Re-add Earth marker
origin_sphere = pv.Sphere(radius=MAX_DIST_LY * 0.012, center=(0, 0, 0))
plotter2.add_mesh(origin_sphere, color='white', opacity=0.9)
plotter2.add_point_labels(
[[0, 0, 0]], ['Earth'], font_size=12,
text_color='white', point_color='white',
point_size=0, always_visible=True, shape_opacity=0
)

# Re-add legend
legend_entries = [
['Ultra sparse (0.5%)', 'darkblue'],
['Very sparse (1%)', 'blue'],
['Sparse (2%)', 'cyan'],
['Medium (5%)', 'green'],
['Dense (10%)', 'yellow'],
['Very dense (20%)', 'orange'],
['Maximum (40%)', 'red'],
]
plotter2.add_legend(legend_entries, bcolor=(0.1, 0.1, 0.1),
border=True, loc='center left', size=(0.18, 0.22))

# Video
plotter2.camera_position = 'iso'
plotter2.camera.zoom(1.2)
plotter2.show(auto_close=False, interactive=False)

print("Creating rotation + zoom-out video...")
plotter2.open_movie('isosurface_radial_velocity_33M_rotation.mp4', framerate=30)
plotter2.render()

n_frames_zoom = 360
path_zoom = plotter2.generate_orbital_path(n_points=n_frames_zoom, shift=0, viewup=[0,0,1], factor=2.5)
plotter2.camera.zoom(2.5)
zoom_per_frame = 1.0 - (1.5 / n_frames_zoom)
for i in range(n_frames_zoom):
plotter2.camera.position = path_zoom.points[i]
plotter2.camera.zoom(zoom_per_frame)
plotter2.write_frame()

n_frames_full = 180 * 3
path_full = plotter2.generate_orbital_path(n_points=n_frames_full, shift=0, viewup=[0,0,1], factor=2.5)
for i in range(n_frames_full):
plotter2.camera.position = path_full.points[i]
plotter2.write_frame()

plotter2.close()
print("Video saved!")

total_time = time.time() - t0
print(f"\n{'='*65}")
print(f" Done! Total runtime: {total_time/60:.1f} minutes")
print(f" Stars: {total_stars:,}")
print(f" Grid: {GRID_SIZE}³ = {GRID_SIZE**3:,} cells")
print(f" Surfaces: {surfaces_added}")
print("=" * 65)

Supplies

LibrariesImage.png
  1. Python
  2. pandas
  3. numpy
  4. scipy
  5. pyvista
  6. multiprocessing
  7. imageio

Radial Velocity 3D Isosurface for 33 Million Stars From GAIA DR3 Data Visualization

Radial-velocity-33m-stars-#3.png
Radial-velocity-33m-stars-#2.png
Radial-velocity-33m-stars.png

Radial Velocity 3D Isosurface for 33 Million Stars From GAIA DR3 Data Visualization

Created using Python and Claude.ai

30 Second Video Rotation: https://youtu.be/ZUqtpMxYCYE

"""
3D Stellar Density Radial Velocity Isosurface — Streaming Large Dataset Version
GAIA DR3 | ~33M stars
Streaming chunked loading — no all_chunks list — RAM-safe for any file size
"""

import os
import pandas as pd
import numpy as np
from scipy.ndimage import gaussian_filter
import pyvista as pv
import multiprocessing as mp
import time
import imageio

os.chdir(r'C:\temp')

# ╔══════════════════════════════════════════════════════════════╗
# ║ SETTINGS ║
# ╚══════════════════════════════════════════════════════════════╝

CSV_FILES = [
'33m-stars-radial-velocity.csv'
]

RA_COL = 'ra'
DEC_COL = 'dec'
PAR_COL = 'parallax'

MAX_DIST_LY = 32600
GRID_SIZE = 400
SIGMA = 1

N_CORES = 4
CHUNK_SIZE = 500_000

RING_DISTANCES = [500, 1000, 2000, 3262, 5000, 8000, 10000, 13000, 16000]
LABELS = []
OUTPUT_IMAGE = 'isosurface_1B_still.png'

RV_COL = 'radial_velocity'
ARROW_STRIDE = 8
RV_THRESHOLD = None # set after d_max is known — handled automatically

# ╔══════════════════════════════════════════════════════════════╗
# ║ MULTIPROCESSING WORKER ║
# ╚══════════════════════════════════════════════════════════════╝

def process_chunk_worker(args):
chunk, x_range, y_range, z_range, max_dist = args

chunk = chunk.copy()
chunk = chunk.dropna(subset=[RA_COL, DEC_COL, PAR_COL])
chunk = chunk[chunk[PAR_COL] > 0]
chunk['distance_ly'] = 3262.0 / chunk[PAR_COL]
chunk = chunk[np.isfinite(chunk['distance_ly'])]
chunk = chunk[(chunk['distance_ly'] > 0) & (chunk['distance_ly'] <= max_dist)]

gs = len(x_range) - 1
if len(chunk) == 0:
return np.zeros((gs, gs, gs), dtype=np.float32), 0

ra_rad = np.radians(chunk[RA_COL].values)
dec_rad = np.radians(chunk[DEC_COL].values)
dist = chunk['distance_ly'].values

xs = dist * np.cos(dec_rad) * np.cos(ra_rad)
ys = dist * np.cos(dec_rad) * np.sin(ra_rad)
zs = dist * np.sin(dec_rad)

mask = (np.abs(xs) < max_dist) & (np.abs(ys) < max_dist) & (np.abs(zs) < max_dist)
xs, ys, zs = xs[mask], ys[mask], zs[mask]

if len(xs) == 0:
return np.zeros((gs, gs, gs), dtype=np.float32), 0

H, _ = np.histogramdd(
np.column_stack([xs, ys, zs]),
bins=(x_range, y_range, z_range),
)
return H.astype(np.float32), int(mask.sum())


# ╔══════════════════════════════════════════════════════════════╗
# ║ STREAMING CHUNK GENERATOR ║
# ╚══════════════════════════════════════════════════════════════╝

def chunk_generator(csv_path, x_range, y_range, z_range, max_dist):
"""Yields one chunk at a time — never loads full file into RAM."""
for chunk in pd.read_csv(
csv_path,
usecols=[RA_COL, DEC_COL, PAR_COL],
dtype={RA_COL: 'float32', DEC_COL: 'float32', PAR_COL: 'float32'},
chunksize=CHUNK_SIZE,
low_memory=False,
):
yield (chunk, x_range, y_range, z_range, max_dist)


# ╔══════════════════════════════════════════════════════════════╗
# ║ MAIN ║
# ╚══════════════════════════════════════════════════════════════╝

if __name__ == '__main__':
print("Script started!")
t0 = time.time()

print("=" * 65)
print(" 3D Stellar Density Isosurface Radial Velocity — Streaming 33M Stars")
print("=" * 65)
print(f" Files: {CSV_FILES}")
print(f" Max dist: {MAX_DIST_LY:,} ly")
print(f" Grid: {GRID_SIZE}³ = {GRID_SIZE**3:,} cells")
print(f" Cores: {N_CORES}")
print(f" Chunk size: {CHUNK_SIZE:,}")
print("=" * 65)

# ── Build grid edges ──────────────────────────────────────
lim = MAX_DIST_LY + 5
x_range = np.linspace(-lim, lim, GRID_SIZE)
y_range = np.linspace(-lim, lim, GRID_SIZE)
z_range = np.linspace(-lim, lim, GRID_SIZE)

H = np.zeros((GRID_SIZE - 1, GRID_SIZE - 1, GRID_SIZE - 1), dtype=np.float32)
total_stars = 0

# ── Stream & process each file ────────────────────────────
for csv_path in CSV_FILES:
if not os.path.exists(csv_path):
print(f"\nWARNING: File not found — {csv_path} (skipping)")
continue

file_size_gb = os.path.getsize(csv_path) / 1e9
print(f"\nStreaming: {csv_path} ({file_size_gb:.2f} GB)")
file_stars = 0
chunk_count = 0

with mp.Pool(processes=N_CORES) as pool:
gen = chunk_generator(csv_path, x_range, y_range, z_range, MAX_DIST_LY)
for h_part, count in pool.imap(process_chunk_worker, gen, chunksize=1):
H += h_part
file_stars += count
chunk_count += 1
if chunk_count % 10 == 0:
elapsed = time.time() - t0
rate = file_stars / elapsed if elapsed > 0 else 0
print(f" Chunk {chunk_count:>5} | Stars: {file_stars:,} | "
f"{elapsed:.0f}s | {rate/1e6:.2f}M stars/sec")

print(f" File total: {file_stars:,} stars")
total_stars += file_stars

print(f"\n{'='*65}")
print(f"All files loaded. Total valid stars: {total_stars:,}")
print(f"Elapsed: {(time.time()-t0)/60:.1f} min")

# ── Smooth ────────────────────────────────────────────────
print(f"\nApplying Gaussian smoothing (sigma={SIGMA})...")
H_smooth = gaussian_filter(H, sigma=SIGMA)
del H

d_min, d_max, d_mean = H_smooth.min(), H_smooth.max(), H_smooth.mean()
print(f"Density stats: min={d_min:.4f}, max={d_max:.4f}, mean={d_mean:.4f}")
print(f"Non-zero cells: {np.sum(H_smooth > 0):,} / {H_smooth.size:,}")

# ── Diagnostic: top 20 densest cells ─────────────────────
print("\n── Top 20 Densest Grid Cells ──")
x_centers = (x_range[:-1] + x_range[1:]) / 2
y_centers = (y_range[:-1] + y_range[1:]) / 2
z_centers = (z_range[:-1] + z_range[1:]) / 2

flat_idx = np.argsort(H_smooth.ravel())[-20:][::-1]
ix, iy, iz = np.unravel_index(flat_idx, H_smooth.shape)

print(f"{'Rank':<5} {'Density':<10} {'X(ly)':<9} {'Y(ly)':<9} {'Z(ly)':<9}"
f" {'RA(deg)':<9} {'Dec(deg)':<10} {'Dist(ly)':<9}")
for rank, (i, j, k) in enumerate(zip(ix, iy, iz), 1):
cx, cy, cz = x_centers[i], y_centers[j], z_centers[k]
dist_c = np.sqrt(cx**2 + cy**2 + cz**2)
ra_d = np.degrees(np.arctan2(cy, cx)) % 360
dec_d = np.degrees(np.arcsin(np.clip(cz / dist_c, -1, 1))) if dist_c > 0 else 0
print(f"{rank:<5} {H_smooth[i,j,k]:<10.2f} {cx:<9.1f} {cy:<9.1f} {cz:<9.1f}"
f" {ra_d:<9.1f} {dec_d:<10.1f} {dist_c:<9.1f}")

print("\nHint: Sco-Cen ~ RA 269, Dec -35 | Orion OB1 ~ RA 84, Dec 0")
print(" Local Bubble boundary ~ 300-1000 ly | Pleiades ~ RA 56, Dec +24\n")

# ── PyVista grid ──────────────────────────────────────────
print("Building PyVista grid...")
grid = pv.ImageData()
grid.dimensions = np.array(H_smooth.shape)
spacing_val = (2 * lim) / (GRID_SIZE - 1)
grid.spacing = (spacing_val, spacing_val, spacing_val)
grid.origin = (-lim, -lim, -lim)
grid.point_data['density'] = H_smooth.flatten(order='F')

# ── Auto-scaled thresholds ────────────────────────────────
RV_THRESHOLD = d_max * 0.02
thresholds = [
(d_max * 0.005, 'darkblue', 0.15, 'Ultra sparse (0.5%)'),
(d_max * 0.01, 'blue', 0.20, 'Very sparse (1%)'),
(d_max * 0.02, 'cyan', 0.30, 'Sparse (2%)'),
(d_max * 0.05, 'green', 0.45, 'Medium (5%)'),
(d_max * 0.10, 'yellow', 0.55, 'Dense (10%)'),
(d_max * 0.20, 'orange', 0.70, 'Very dense (20%)'),
(d_max * 0.40, 'red', 0.85, 'Maximum (40%)'),
]

print(f"Auto-scaled thresholds (max density = {d_max:.2f}):")
for t in thresholds:
print(f" {t[3]}: {t[0]:.4f}")

# ── Plotter ───────────────────────────────────────────────
plotter = pv.Plotter()
plotter.set_background('black')

surfaces_added = 0
for threshold, color, opacity, label in thresholds:
try:
contour = grid.contour([threshold], scalars='density')
if contour.n_points > 0:
plotter.add_mesh(contour, color=color, opacity=opacity,
show_edges=False, smooth_shading=True, label=label)
surfaces_added += 1
print(f" Surface [{label}] @ {threshold:.4f}: {contour.n_points:,} points")
else:
print(f" No surface at {threshold:.4f}")
except Exception as e:
print(f" Failed at {threshold:.4f}: {e}")

print(f"Total surfaces added: {surfaces_added}")

# ── Accumulate all 3 vector components ───────────────────────
RV_vx = np.zeros((GRID_SIZE-1,)*3, dtype=np.float32)
RV_vy = np.zeros((GRID_SIZE-1,)*3, dtype=np.float32)
RV_vz = np.zeros((GRID_SIZE-1,)*3, dtype=np.float32)
RV_count = np.zeros((GRID_SIZE-1,)*3, dtype=np.float32)

for csv_path in CSV_FILES:
chunk_num = 0
for chunk in pd.read_csv(
csv_path,
usecols=[RA_COL, DEC_COL, PAR_COL, RV_COL],
dtype={RA_COL:'float32', DEC_COL:'float32',
PAR_COL:'float32', RV_COL:'float32'},
chunksize=CHUNK_SIZE,
low_memory=False,
):
chunk = chunk.dropna(subset=[RA_COL, DEC_COL, PAR_COL, RV_COL])
chunk = chunk[chunk[PAR_COL] > 0]
chunk['distance_ly'] = 3262.0 / chunk[PAR_COL]
chunk = chunk[np.isfinite(chunk['distance_ly'])]
chunk = chunk[(chunk['distance_ly'] > 0) & (chunk['distance_ly'] <= MAX_DIST_LY)]
if len(chunk) == 0:
continue

ra_rad = np.radians(chunk[RA_COL].values)
dec_rad = np.radians(chunk[DEC_COL].values)
dist = chunk['distance_ly'].values
rv = chunk[RV_COL].values

xs = dist * np.cos(dec_rad) * np.cos(ra_rad)
ys = dist * np.cos(dec_rad) * np.sin(ra_rad)
zs = dist * np.sin(dec_rad)

# Line-of-sight unit vector scaled by radial velocity
norm = np.maximum(dist, 1e-6)
vx = (xs / norm) * rv
vy = (ys / norm) * rv
vz = (zs / norm) * rv

gs = GRID_SIZE - 1
xi = np.clip(np.searchsorted(x_range, xs) - 1, 0, gs-1)
yi = np.clip(np.searchsorted(y_range, ys) - 1, 0, gs-1)
zi = np.clip(np.searchsorted(z_range, zs) - 1, 0, gs-1)

np.add.at(RV_vx, (xi, yi, zi), vx)
np.add.at(RV_vy, (xi, yi, zi), vy)
np.add.at(RV_vz, (xi, yi, zi), vz)
np.add.at(RV_count, (xi, yi, zi), 1)

chunk_num += 1
if chunk_num % 10 == 0:
print(f" RV chunk {chunk_num}")

# ── Mean velocity per voxel ───────────────────────────────────
valid_cells = RV_count > 0
RV_vx[valid_cells] /= RV_count[valid_cells]
RV_vy[valid_cells] /= RV_count[valid_cells]
RV_vz[valid_cells] /= RV_count[valid_cells]

# ── Build arrow positions and vectors ────────────────────────
x_centers = (x_range[:-1] + x_range[1:]) / 2
y_centers = (y_range[:-1] + y_range[1:]) / 2
z_centers = (z_range[:-1] + z_range[1:]) / 2

# Apply density threshold + stride
gs = GRID_SIZE - 1
ix = np.arange(0, gs, ARROW_STRIDE)
iy = np.arange(0, gs, ARROW_STRIDE)
iz = np.arange(0, gs, ARROW_STRIDE)
IX, IY, IZ = np.meshgrid(ix, iy, iz, indexing='ij')
IX, IY, IZ = IX.ravel(), IY.ravel(), IZ.ravel()

density_vals = H_smooth[IX, IY, IZ]
mask = density_vals > RV_THRESHOLD

IX, IY, IZ = IX[mask], IY[mask], IZ[mask]

arrow_x = x_centers[IX]
arrow_y = y_centers[IY]
arrow_z = z_centers[IZ]

arrow_vx = RV_vx[IX, IY, IZ]
arrow_vy = RV_vy[IX, IY, IZ]
arrow_vz = RV_vz[IX, IY, IZ]

print(f"Arrows to render: {len(arrow_x):,}")

# ── Add to plotter ────────────────────────────────────────────
arrow_points = np.column_stack([arrow_x, arrow_y, arrow_z])
arrow_vectors = np.column_stack([arrow_vx, arrow_vy, arrow_vz])

# Color by RV magnitude (+ = receding, - = approaching)
rv_magnitude = np.sqrt(arrow_vx**2 + arrow_vy**2 + arrow_vz**2)

pdata = pv.PolyData(arrow_points)
pdata['vectors'] = arrow_vectors
pdata['rv_speed'] = rv_magnitude

arrows = pdata.glyph(orient='vectors', scale='rv_speed',
factor=80, # scale arrow size — adjust as needed
geom=pv.Arrow())

plotter.add_mesh(arrows, scalars='rv_speed', cmap='coolwarm',
show_scalar_bar=True, scalar_bar_args={
'title': 'Radial Velocity (km/s)',
'color': 'white'
})

print("Vector field added!")


# ── Annotations ───────────────────────────────────────────
plotter.add_text(
f'3D Stellar Density Isosurfaces Radial Velocity\n{total_stars:,} Stars | GAIA DR3',
position='upper_edge', font_size=12, color='white'
)
plotter.add_text(
'Thin shells near Earth = Stellar Desert\nThick shells at distance = Dense regions',
position='lower_left', font_size=9, color='white'
)
plotter.show_axes()

# ── Earth marker ──────────────────────────────────────────
origin_sphere = pv.Sphere(radius=MAX_DIST_LY * 0.012, center=(0, 0, 0))
plotter.add_mesh(origin_sphere, color='white', opacity=0.9)
plotter.add_point_labels(
[[0, 0, 0]], ['Earth'], font_size=12,
text_color='white', point_color='white',
point_size=0, always_visible=True, shape_opacity=0
)

# ── Legend ────────────────────────────────────────────────
legend_entries = [
['Ultra sparse (0.5%)', 'darkblue'],
['Very sparse (1%)', 'blue'],
['Sparse (2%)', 'cyan'],
['Medium (5%)', 'green'],
['Dense (10%)', 'yellow'],
['Very dense (20%)', 'orange'],
['Maximum (40%)', 'red'],
]
plotter.add_legend(legend_entries, bcolor=(0.1, 0.1, 0.1),
border=True, loc='center left', size=(0.18, 0.22))

# ── Show ──────────────────────────────────────────────────
# ── Show settings ─────────────────────────────────────────
INTERACTIVE = True
MAKE_VIDEO = True

plotter.camera_position = 'iso'
plotter.camera.zoom(1.2)

if INTERACTIVE:
plotter.show()

if MAKE_VIDEO:
# Rebuild plotter for video (PyVista closes renderer after show())
plotter2 = pv.Plotter(off_screen=True)
plotter2.set_background('black')

# Re-add isosurfaces
for threshold, color, opacity, label in thresholds:
try:
contour = grid.contour([threshold], scalars='density')
if contour.n_points > 0:
plotter2.add_mesh(contour, color=color, opacity=opacity,
show_edges=False, smooth_shading=True)
except:
pass

# Re-add arrows
plotter2.add_mesh(arrows, scalars='rv_speed', cmap='coolwarm',
show_scalar_bar=True, scalar_bar_args={
'title': 'Radial Velocity (km/s)',
'color': 'white'
})

# Re-add annotations
plotter2.add_text(
f'3D Stellar Density Isosurfaces Radial Velocity\n{total_stars:,} Stars | GAIA DR3',
position='upper_edge', font_size=12, color='white'
)
plotter2.add_text(
'Thin shells near Earth = Stellar Desert\nThick shells at distance = Dense regions',
position='lower_left', font_size=9, color='white'
)
plotter2.show_axes()

# Re-add Earth marker
origin_sphere = pv.Sphere(radius=MAX_DIST_LY * 0.012, center=(0, 0, 0))
plotter2.add_mesh(origin_sphere, color='white', opacity=0.9)
plotter2.add_point_labels(
[[0, 0, 0]], ['Earth'], font_size=12,
text_color='white', point_color='white',
point_size=0, always_visible=True, shape_opacity=0
)

# Re-add legend
legend_entries = [
['Ultra sparse (0.5%)', 'darkblue'],
['Very sparse (1%)', 'blue'],
['Sparse (2%)', 'cyan'],
['Medium (5%)', 'green'],
['Dense (10%)', 'yellow'],
['Very dense (20%)', 'orange'],
['Maximum (40%)', 'red'],
]
plotter2.add_legend(legend_entries, bcolor=(0.1, 0.1, 0.1),
border=True, loc='center left', size=(0.18, 0.22))

# Video
plotter2.camera_position = 'iso'
plotter2.camera.zoom(1.2)
plotter2.show(auto_close=False, interactive=False)

print("Creating rotation + zoom-out video...")
plotter2.open_movie('isosurface_radial_velocity_33M_rotation.mp4', framerate=30)
plotter2.render()

n_frames_zoom = 360
path_zoom = plotter2.generate_orbital_path(n_points=n_frames_zoom, shift=0, viewup=[0,0,1], factor=2.5)
plotter2.camera.zoom(2.5)
zoom_per_frame = 1.0 - (1.5 / n_frames_zoom)
for i in range(n_frames_zoom):
plotter2.camera.position = path_zoom.points[i]
plotter2.camera.zoom(zoom_per_frame)
plotter2.write_frame()

n_frames_full = 180 * 3
path_full = plotter2.generate_orbital_path(n_points=n_frames_full, shift=0, viewup=[0,0,1], factor=2.5)
for i in range(n_frames_full):
plotter2.camera.position = path_full.points[i]
plotter2.write_frame()

plotter2.close()
print("Video saved!")

total_time = time.time() - t0
print(f"\n{'='*65}")
print(f" Done! Total runtime: {total_time/60:.1f} minutes")
print(f" Stars: {total_stars:,}")
print(f" Grid: {GRID_SIZE}³ = {GRID_SIZE**3:,} cells")
print(f" Surfaces: {surfaces_added}")
print("=" * 65)