diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ba0430d --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +__pycache__/ \ No newline at end of file diff --git a/README.md b/README.md index 6201b51..16f51ae 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,19 @@ # melspectrum-video -Make a video showing a horizontally rolling mel spectrogram of the audio input. \ No newline at end of file +Make a video showing a horizontally rolling mel spectrogram of the audio input. + +## About + +Copyright for media included in media/ +[Alouette.ogg](https://en.wikipedia.org/wiki/File:Alouette.mid) is available under Creative Commons CC0 1.0 +[Zonotrichia.ogg](https://en.wikipedia.org/wiki/File:Voice_of_Zonotrichia_albicollis.ogg) is available under Creative Commons Attribution 3.0 Unported license. + +## How to run + +Python 3.11, `pip install -r requirements.txt` + +To produce the example video: + +`python -m movie Alouette.ogg Alouette.mp4 --offset 18 --duration 23 --height 800 --window 3 --fps 24 --fmax 4000` + +`python -m movie Zonotrichia.ogg Zonotrichia.mp4 --window 1 --pps 600` \ No newline at end of file diff --git a/media/Alouette.mp4 b/media/Alouette.mp4 new file mode 100644 index 0000000..5a34eb2 Binary files /dev/null and b/media/Alouette.mp4 differ diff --git a/media/Alouette.ogg b/media/Alouette.ogg new file mode 100644 index 0000000..f9bfc58 Binary files /dev/null and b/media/Alouette.ogg differ diff --git a/media/Zonotrichia.mp4 b/media/Zonotrichia.mp4 new file mode 100644 index 0000000..24e4234 Binary files /dev/null and b/media/Zonotrichia.mp4 differ diff --git a/media/Zonotrichia.ogg b/media/Zonotrichia.ogg new file mode 100644 index 0000000..6a5f6e3 Binary files /dev/null and b/media/Zonotrichia.ogg differ diff --git a/movie.py b/movie.py new file mode 100644 index 0000000..67fe8bf --- /dev/null +++ b/movie.py @@ -0,0 +1,92 @@ +import matplotlib.pyplot as plt +import soundfile as sf +import librosa +import librosa.display +import numpy as np +from moviepy.editor import VideoClip, AudioFileClip +from PIL import Image + + +def make_mel(offset_load,duration_load,pps,height,fmax,audio_path,mel_path): + """ + make mel spectrogram image. + pps: pixel of x axis per second of audio + fmax: max Mel frequency (Hz) + """ + y, sr = librosa.load(audio_path,sr=None,offset=offset_load,duration=duration_load) + if offset_load != 0 or duration_load != None: + print(y.shape) + audio_path = audio_path + '.cut.wav' + sf.write(audio_path,y,sr,'PCM_24') + duration = librosa.get_duration(y=y,sr=sr) + print("y.shape:" + str(y.shape)) + print("sample rate:" + str(sr)) + print("duration:%.4f s"%duration) + plt.figure(figsize=(duration*pps,height), dpi=1) + plt.axis('off') + spect = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048, hop_length=512) + mel_spect = librosa.power_to_db(spect, ref=np.max) + librosa.display.specshow(mel_spect, sr=sr, y_axis='mel', fmax=fmax, x_axis='time') + plt.savefig(mel_path, bbox_inches='tight', dpi=1) + return duration, audio_path + +def make_video(audio_path,mel_path,video_path,duration,window,fps): + img = Image.open(mel_path) + img = np.array(img.convert('RGB')) + print(img.shape) + screen_h, total_width, _ = img.shape + + pps = total_width / duration # recalculate pixel per second + print("totalwidth:", total_width) + print("duration:", duration) + print("pps:", pps) + screen_w = int(2*window*pps) + print(f"Generating a {screen_w} x {screen_h} video") + filler = np.zeros((screen_h, screen_w, 3)) + superimpose = np.zeros((screen_h, screen_w, 3)) + superimpose[:,screen_w//2-2:screen_w//2+2,:] = np.array([100.0, -100.0, -100.0]) + + def make_frame(t): + if t < window: + idx = int((t+window)*pps) + ret = np.concatenate((filler[:,0:screen_w-idx,:], img[:,:idx,:]), axis=1) + elif t > duration - window: + idx = int((t-window)*pps) + final = int(duration*pps) + idx = min(idx+1, final) # round up and cut to avoid screen_w-(final-idx)<0 + ret = np.concatenate((img[:,idx:final,:], filler[:,0:screen_w-(final-idx),:]), axis=1) + else: + idx = int((t-window)*pps) + ret = img[:,idx:idx+screen_w,:] + return ret + superimpose + + # Create the audio and video clip + audio_clip = AudioFileClip(audio_path) + video_clip = VideoClip(make_frame, duration=audio_clip.duration) + video_clip = video_clip.set_audio(audio_clip) + video_clip.write_videofile(video_path, fps=fps) + + +if __name__ == "__main__": + import argparse + parser = argparse.ArgumentParser(description='Make a video showing a horizontally rolling mel spectrogram\ + of the audio input') + parser.add_argument('audio_path') + parser.add_argument('video_path') + parser.add_argument('--pps', type=int, default=200, help="horizondal pixels per second") + parser.add_argument('--fmax', type=float, default=8000, help='max Mel frequency (Hz)') + parser.add_argument('--window', type=float, default=5, help='2*window size is the duration (seconds)\ + of the audio segment represented by every frame') + parser.add_argument('--height', type=int, default=1000, help='height of the mel spectrogram and the video') + parser.add_argument('--fps', type=int, default=24, help="fps of the output video") + parser.add_argument('--offset', type=float, default=0, help="start reading after this time (seconds)") + parser.add_argument('--duration', type=float, nargs="?", help="only load up to 'duration' seconds \ + of the original audio") + args = parser.parse_args() + + audio_path = args.audio_path if '/' in args.audio_path else ('./media/'+args.audio_path) + video_path = args.video_path if '/' in args.video_path else ('./media/'+args.video_path) + mel_path = './media/' + args.audio_path.split('/')[-1] + '.png' + + duration, audio_path = make_mel(args.offset, args.duration, args.pps, args.height, args.fmax, audio_path, mel_path) + make_video(audio_path, mel_path, video_path, duration, args.window, args.fps) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..ce27b63 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +matplotlib==3.7.1 +soundfile==0.12.1 +librosa==0.10.0 +moviepy==1.0.3 +pillow==9.3.0 \ No newline at end of file