add files
This commit is contained in:
parent
22971f7fc9
commit
e8a894eaa0
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
__pycache__/
|
16
README.md
16
README.md
@ -1,3 +1,19 @@
|
|||||||
# melspectrum-video
|
# melspectrum-video
|
||||||
|
|
||||||
Make a video showing a horizontally rolling mel spectrogram of the audio input.
|
Make a video showing a horizontally rolling mel spectrogram of the audio input.
|
||||||
|
|
||||||
|
## About
|
||||||
|
|
||||||
|
Copyright for media included in media/
|
||||||
|
[Alouette.ogg](https://en.wikipedia.org/wiki/File:Alouette.mid) is available under Creative Commons CC0 1.0
|
||||||
|
[Zonotrichia.ogg](https://en.wikipedia.org/wiki/File:Voice_of_Zonotrichia_albicollis.ogg) is available under Creative Commons Attribution 3.0 Unported license.
|
||||||
|
|
||||||
|
## How to run
|
||||||
|
|
||||||
|
Python 3.11, `pip install -r requirements.txt`
|
||||||
|
|
||||||
|
To produce the example video:
|
||||||
|
|
||||||
|
`python -m movie Alouette.ogg Alouette.mp4 --offset 18 --duration 23 --height 800 --window 3 --fps 24 --fmax 4000`
|
||||||
|
|
||||||
|
`python -m movie Zonotrichia.ogg Zonotrichia.mp4 --window 1 --pps 600`
|
BIN
media/Alouette.mp4
Normal file
BIN
media/Alouette.mp4
Normal file
Binary file not shown.
BIN
media/Alouette.ogg
Normal file
BIN
media/Alouette.ogg
Normal file
Binary file not shown.
BIN
media/Zonotrichia.mp4
Normal file
BIN
media/Zonotrichia.mp4
Normal file
Binary file not shown.
BIN
media/Zonotrichia.ogg
Normal file
BIN
media/Zonotrichia.ogg
Normal file
Binary file not shown.
92
movie.py
Normal file
92
movie.py
Normal file
@ -0,0 +1,92 @@
|
|||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import soundfile as sf
|
||||||
|
import librosa
|
||||||
|
import librosa.display
|
||||||
|
import numpy as np
|
||||||
|
from moviepy.editor import VideoClip, AudioFileClip
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
|
||||||
|
def make_mel(offset_load,duration_load,pps,height,fmax,audio_path,mel_path):
|
||||||
|
"""
|
||||||
|
make mel spectrogram image.
|
||||||
|
pps: pixel of x axis per second of audio
|
||||||
|
fmax: max Mel frequency (Hz)
|
||||||
|
"""
|
||||||
|
y, sr = librosa.load(audio_path,sr=None,offset=offset_load,duration=duration_load)
|
||||||
|
if offset_load != 0 or duration_load != None:
|
||||||
|
print(y.shape)
|
||||||
|
audio_path = audio_path + '.cut.wav'
|
||||||
|
sf.write(audio_path,y,sr,'PCM_24')
|
||||||
|
duration = librosa.get_duration(y=y,sr=sr)
|
||||||
|
print("y.shape:" + str(y.shape))
|
||||||
|
print("sample rate:" + str(sr))
|
||||||
|
print("duration:%.4f s"%duration)
|
||||||
|
plt.figure(figsize=(duration*pps,height), dpi=1)
|
||||||
|
plt.axis('off')
|
||||||
|
spect = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048, hop_length=512)
|
||||||
|
mel_spect = librosa.power_to_db(spect, ref=np.max)
|
||||||
|
librosa.display.specshow(mel_spect, sr=sr, y_axis='mel', fmax=fmax, x_axis='time')
|
||||||
|
plt.savefig(mel_path, bbox_inches='tight', dpi=1)
|
||||||
|
return duration, audio_path
|
||||||
|
|
||||||
|
def make_video(audio_path,mel_path,video_path,duration,window,fps):
|
||||||
|
img = Image.open(mel_path)
|
||||||
|
img = np.array(img.convert('RGB'))
|
||||||
|
print(img.shape)
|
||||||
|
screen_h, total_width, _ = img.shape
|
||||||
|
|
||||||
|
pps = total_width / duration # recalculate pixel per second
|
||||||
|
print("totalwidth:", total_width)
|
||||||
|
print("duration:", duration)
|
||||||
|
print("pps:", pps)
|
||||||
|
screen_w = int(2*window*pps)
|
||||||
|
print(f"Generating a {screen_w} x {screen_h} video")
|
||||||
|
filler = np.zeros((screen_h, screen_w, 3))
|
||||||
|
superimpose = np.zeros((screen_h, screen_w, 3))
|
||||||
|
superimpose[:,screen_w//2-2:screen_w//2+2,:] = np.array([100.0, -100.0, -100.0])
|
||||||
|
|
||||||
|
def make_frame(t):
|
||||||
|
if t < window:
|
||||||
|
idx = int((t+window)*pps)
|
||||||
|
ret = np.concatenate((filler[:,0:screen_w-idx,:], img[:,:idx,:]), axis=1)
|
||||||
|
elif t > duration - window:
|
||||||
|
idx = int((t-window)*pps)
|
||||||
|
final = int(duration*pps)
|
||||||
|
idx = min(idx+1, final) # round up and cut to avoid screen_w-(final-idx)<0
|
||||||
|
ret = np.concatenate((img[:,idx:final,:], filler[:,0:screen_w-(final-idx),:]), axis=1)
|
||||||
|
else:
|
||||||
|
idx = int((t-window)*pps)
|
||||||
|
ret = img[:,idx:idx+screen_w,:]
|
||||||
|
return ret + superimpose
|
||||||
|
|
||||||
|
# Create the audio and video clip
|
||||||
|
audio_clip = AudioFileClip(audio_path)
|
||||||
|
video_clip = VideoClip(make_frame, duration=audio_clip.duration)
|
||||||
|
video_clip = video_clip.set_audio(audio_clip)
|
||||||
|
video_clip.write_videofile(video_path, fps=fps)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import argparse
|
||||||
|
parser = argparse.ArgumentParser(description='Make a video showing a horizontally rolling mel spectrogram\
|
||||||
|
of the audio input')
|
||||||
|
parser.add_argument('audio_path')
|
||||||
|
parser.add_argument('video_path')
|
||||||
|
parser.add_argument('--pps', type=int, default=200, help="horizondal pixels per second")
|
||||||
|
parser.add_argument('--fmax', type=float, default=8000, help='max Mel frequency (Hz)')
|
||||||
|
parser.add_argument('--window', type=float, default=5, help='2*window size is the duration (seconds)\
|
||||||
|
of the audio segment represented by every frame')
|
||||||
|
parser.add_argument('--height', type=int, default=1000, help='height of the mel spectrogram and the video')
|
||||||
|
parser.add_argument('--fps', type=int, default=24, help="fps of the output video")
|
||||||
|
parser.add_argument('--offset', type=float, default=0, help="start reading after this time (seconds)")
|
||||||
|
parser.add_argument('--duration', type=float, nargs="?", help="only load up to 'duration' seconds \
|
||||||
|
of the original audio")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
audio_path = args.audio_path if '/' in args.audio_path else ('./media/'+args.audio_path)
|
||||||
|
video_path = args.video_path if '/' in args.video_path else ('./media/'+args.video_path)
|
||||||
|
mel_path = './media/' + args.audio_path.split('/')[-1] + '.png'
|
||||||
|
|
||||||
|
duration, audio_path = make_mel(args.offset, args.duration, args.pps, args.height, args.fmax, audio_path, mel_path)
|
||||||
|
make_video(audio_path, mel_path, video_path, duration, args.window, args.fps)
|
5
requirements.txt
Normal file
5
requirements.txt
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
matplotlib==3.7.1
|
||||||
|
soundfile==0.12.1
|
||||||
|
librosa==0.10.0
|
||||||
|
moviepy==1.0.3
|
||||||
|
pillow==9.3.0
|
Loading…
x
Reference in New Issue
Block a user