This topic was automatically generated from Slack. You can find the original thread here.
- you’re so good at your work.
I am now writing my first ever Python step in Pipedream - youre going to have to hold my hand!!
Here’s the code:
import json
import pandas as pd
import xlsxwriter # Add this import
pd.set_option(‘display.max_rows’, None)
# Load the JSON data for beats and lyrics
with open(“beatMap.json”, “r”) as f:
beats = json.load(f)
with open(“lyrics.json”, “r”) as f:
lyrics = json.load(f)
# Preprocess lyrics to add midpoints and calculate durations for each word
for lyric in lyrics:
for word in lyric[‘words’]:
word[‘midpoint’] = (word[‘start’] + word[‘end’]) / 2
word[‘duration’] = word[‘end’] - word[‘start’]
# Group beats into bars and format beat dictionaries correctly
bars = {}
bar_times =
current_bar = 1
for i, beat in enumerate(beats):
if (i % 4) == 0 and i != 0:
current_bar += 1
if current_bar not in bars:
bars[current_bar] =
bar_times.append(beat[‘time’])
_# Initialize beat dictionary with 'start', 'end', and 'lyric' keys_
beat_dict = {
'time': beat['time'],
'start': beat['time'],
'end': beats[i + 1]['time'] _if_ i + 1 < len(beats) _else_ None, _# Ensure 'end' is set_
'lyric': []
}
bars[current_bar].append(beat_dict)
def allocate_words_to_bars(bar_times, lyrics, bars):
for bar_num, beats in bars.items():
for beat in beats:
beat[‘lyric’] = # Clear lyrics to ensure fresh start
_for_ lyric in _lyrics_:
_for_ word in lyric['words']:
best_fit = None
max_overlap = 0
_for_ beat in beats:
_if_ beat['end'] is None or word['end'] < beat['start']:
_continue_
overlap = min(beat['end'], word['end']) - max(beat['start'], word['start'])
_if_ overlap > 0:
overlap_percentage = overlap / (word['end'] - word['start'])
_if_ word['start'] >= beat['start'] and word['start'] < beat['end']:
overlap_percentage += 0.5 _# Increase weight for starting alignment_
_if_ overlap_percentage > max_overlap:
max_overlap = overlap_percentage
best_fit = beat
_if_ best_fit and max_overlap > 0.3: _# Adjusted threshold_
best_fit['lyric'].append(word['word'])
_# Fill empty beats with their beat number_
_for_ i, beat in enumerate(beats):
_if_ not beat['lyric']:
beat['lyric'].append(str(i + 1))
# Example call to this function
allocate_words_to_bars(bar_times, lyrics, bars)
# After defining bars and before creating the DataFrame
allocate_words_to_bars(bar_times, lyrics, bars)
# Prompt for the file name
file_name = input("Enter the file name for the Excel output (without extension): ") + “.xlsx”
# Create DataFrame and populate it using direct indexing instead of append
df_beats = pd.DataFrame(columns=[‘Time’, ‘Bar’, ‘1’, ‘2’, ‘3’, ‘4’])
for bar, beats in bars.items():
row = {‘Bar’: bar}
# Convert the start time of the first beat to “00:00” format
start_time = beats[0][‘start’]
minutes = int(start_time // 60)
seconds = int(start_time % 60)
row[‘Time’] = f"{minutes:02}:{seconds:02}"
_for_ i, beat in enumerate(beats):
row[str(i+1)] = ' '.join(beat['lyric'])
df_beats.loc[len(df_beats)] = row
# Write the DataFrame to an Excel file using XlsxWriter
with pd.ExcelWriter(file_name, engine=‘xlsxwriter’) as writer:
df_beats.to_excel(writer, index=False, sheet_name=‘Beats’)
worksheet = writer.sheets[‘ScriptRabbit’]
worksheet.set_column(‘A:A’, 5, None, {‘align’: ‘center’})
worksheet.set_column(‘B:B’, 3.33, None, {‘align’: ‘center’})
worksheet.set_column(‘C:F’, 13.33)
_# Ensure numeric columns are written as numbers_
_for_ col_num, value in enumerate(df_beats.columns.values):
_if_ value != 'Time' and value != 'Bar':
_for_ row_num, cell_value in enumerate(df_beats[value], _start_=1):
_if_ isinstance(cell_value, (int, float)):
worksheet.write_number(row_num, col_num, cell_value)
_else_:
worksheet.write_string(row_num, col_num, cell_value)
A few things about it:
Files saved as /tmp/lyrics.json and /tmp/beatMap.json
Re:
# Prompt for the file name
file_name = input("Enter the file name for the Excel output (without extension): ") + “.xlsx”
I want to use “steps.Moisis_Pause_For_Processing_Compeptition.jobDetails.name” - however, this will be an audio file name, for example “Crash Charli XCX.mp3” - so I need to disregard the “.mp3”