Source code for asmd.convert_from_file

import csv
import os
import re
from copy import deepcopy
from functools import wraps

import numpy as np
import pretty_midi
import scipy.io

from . import utils


[docs]def convert(exts, no_dot=True, remove_player=False):
    """
    This function is designed to be used as decorators for functions which
    converts from a filetype to our JSON format.

    Example of usage:

    >>> @convert(['.myext'], no_dot=True, remove_player=False)
    ... def function_which_converts(...):
    ...     pass

    Parameters
    ---
    * ext : list of str
        the possible extensions of the ground-truths to be converted, e.g.
        ['.mid', '.midi']. You can also use this parameter to remove exceeding
        parts at the end of the filename (see `from_bach10_mat` and
        `from_bach10_f0` source code)

    * no_dot : boolean
        if True, don't add a dot before of the extension, if False, add it
        if not present; this is useful if you are using the extension to remove
        other parts in the file name (see `ext`).

    * remove_player : boolean
        if True, remove the name of the player in the last part of the file
        name: use this for the `traditional_flute` dataset; it will remove the
        part after the last '_'.
    """
    def _convert(user_convert):
        @wraps(user_convert)
        def func(input_fn, *args, **kwargs):
            for ext in exts:
                new_fn = change_ext(input_fn, ext, no_dot, remove_player)
                if os.path.exists(new_fn):
                    break

            out = user_convert(new_fn, *args, **kwargs)

            if type(out) is dict:
                out = [out]
            return out

        return func

    return _convert


prototype_gt = {
    "precise_alignment": {
        "onsets": [],
        "offsets": [],
        "pitches": [],
        "notes": [],
        "velocities": []
    },
    "misaligned": {
        "onsets": [],
        "offsets": [],
        "pitches": [],
        "notes": [],
        "velocities": []
    },
    "score": {
        "onsets": [],
        "offsets": [],
        "pitches": [],
        "notes": [],
        "velocities": [],
        "beats": []
    },
    "broad_alignment": {
        "onsets": [],
        "offsets": [],
        "pitches": [],
        "notes": [],
        "velocities": []
    },
    "missing": [],
    "extra": [],
    "f0": [],
    "soft": {
        "values": [],
        "times": []
    },
    "sostenuto": {
        "values": [],
        "times": []
    },
    "sustain": {
        "values": [],
        "times": []
    },
    "instrument": 255,
}
"""
The dictionary prototype for containing the ground_truth.
use:

>>> from copy import deepcopy
... from convert_from_file import prototype_gt
... prototype_gt = deepcopy(prototype_gt)

>>> prototype_gt
{
    "precise_alignment": {
        "onsets": [],
        "offsets": [],
        "pitches": [],
        "notes": [],
        "velocities": []
    },
    "misaligned": {
        "onsets": [],
        "offsets": [],
        "pitches": [],
        "notes": [],
        "velocities": []
    },
    "score": {
        "onsets": [],
        "offsets": [],
        "pitches": [],
        "notes": [],
        "velocities": [],
        "beats": []
    },
    "broad_alignment": {
        "onsets": [],
        "offsets": [],
        "pitches": [],
        "notes": [],
        "velocities": []
    },
    "f0": [],
    "soft": {
        "values": [],
        "times": []
    },
    "sostenuto": {
        "values": [],
        "times": []
    },
    "sustain": {
        "values": [],
        "times": []
    },
    "instrument": 255,
}

Note: ``pitches``, ``velocities``, ``sustain``, ``sostenuto``, ``soft``, and
(if available) ``instrument`` must be in range [0, 128)
"""


[docs]def change_ext(input_fn, new_ext, no_dot=False, remove_player=False):
    """
    Return the input path `input_fn` with `new_ext` as extension and the part
    after the last '-' removed.
    If `no_dot` is True, it will not add a dot before of the extension,
    otherwise it will add it if not present.
    `remove_player` can be used to remove the name of the player in the last
    part of the file name when: use this for the `traditional_flute` dataset;
    it will remove the last part after '_'.
    """

    root = input_fn[:input_fn.rfind('-')]
    if remove_player:
        root = root[:root.rfind('_')]
    if not new_ext.startswith('.'):
        if not no_dot:
            new_ext = '.' + new_ext

    return root + new_ext


[docs]def _sort_lists(*lists):
    """
    Sort multiple lists in-place with reference to the first one
    """

    idx = list(range(len(lists[0])))
    idx.sort(key=lists[0].__getitem__)
    for i in range(len(lists)):
        if len(lists[i]) > 0:
            lists[i][:] = map(lists[i].__getitem__, idx)


[docs]def _sort_alignment(alignment, data):
    """
    Sort `data` in `alignment` (in-place)
    """

    _sort_lists(data[alignment]['onsets'], data[alignment]['pitches'],
                data[alignment]['offsets'], data[alignment]['velocities'],
                data[alignment]['notes'])


[docs]def _sort_pedal(data):
    """
    Sort pedal for `data` (in-place)
    """
    for cc_name in ['soft', 'sustain', 'sostenuto']:
        _sort_lists(data[cc_name]['times'], data[cc_name]['values'])


[docs]def from_midi(midi_fn,
              alignment='precise_alignment',
              pitches=True,
              velocities=True,
              merge=True,
              beats=False):
    """
    Open a midi file `midi_fn` and convert it to our ground_truth
    representation. This fills velocities, pitches, beats, sustain, soft,
    sostenuto and alignment (default: `precise_alignment`). Returns a list
    containing a dictionary. `alignment` can also be `None` or `False`, in that
    case no alignment is filled. If `merge` is True, the returned list will
    contain a dictionary for each track.  Beats are filled according to tempo
    changes.

    This functions is decorated with 3 different sets of parameters:

    * `from_midi` is the decorated version with `remove_player=False`
    * `from_midi_remove_player` is the decorated version with
      `remove_player=True`
    * `from_midi_asap` is the decorated version which accept extension
      '.score.mid' which is used in the script to import scores from ASAP

    N.B. To allow having some annotation for subgroups of a dataset, this
    function returns None when it cannot find the specified midi file; in this
    way, that file is not taken into account while merging the various
    annotations (e.g. asap group inside Maestro dataset)
    """
    try:
        pm = pretty_midi.PrettyMIDI(midi_fn)
    except FileNotFoundError:
        return None

    out = list()

    if merge:
        data = deepcopy(prototype_gt)

    for track in pm.instruments:
        if not merge:
            data = deepcopy(prototype_gt)
        for cc in track.control_changes:
            if cc.number == 64:
                cc_name = 'sustain'
            elif cc.number == 66:
                cc_name = 'sostenuto'
            elif cc.number == 67:
                cc_name = 'soft'
            else:
                continue
            data[cc_name]['values'].append(cc.value)
            data[cc_name]['times'].append(cc.time)

        for note in track.notes:
            if pitches:
                data[alignment]["pitches"].append(note.pitch)
            if velocities:
                data[alignment]["velocities"].append(note.velocity)
            if alignment:
                data[alignment]["onsets"].append(float(note.start))
                data[alignment]["offsets"].append(float(note.end))

        if beats and alignment == 'score':
            data[alignment]["beats"] = sorted(pm.get_beats().tolist())

        if not merge:
            _sort_pedal(data)
            _sort_alignment(alignment, data)
            out.append(data)

    if merge:
        _sort_pedal(data)
        _sort_alignment(alignment, data)
        out.append(data)

    return out


from_midi_remove_player = convert(['.mid', '.midi'],
                                  remove_player=True)(from_midi)
from_midi_asap = convert(['.score.mid'], remove_player=False)(from_midi)

# N.B. this one must be the last one, otherwise the previous modify the already decorated function!
from_midi = convert(['.mid', '.midi'], remove_player=False)(from_midi)


[docs]@convert(['.txt'])
def from_phenicx_txt(txt_fn):
    """
    Open a txt file `txt_fn` in the PHENICX format and convert it to our
    ground_truth representation. This fills: `broad_alignment`.
    """
    out_list = list()

    with open(txt_fn) as f:
        lines = f.readlines()

    out = deepcopy(prototype_gt)
    for line in lines:
        fields = re.split(',|\n', line)
        out["broad_alignment"]["notes"].append(fields[2])
        out["broad_alignment"]["pitches"].append(
            pretty_midi.note_name_to_number(fields[2]))
        out["broad_alignment"]["onsets"].append(float(fields[0]))
        out["broad_alignment"]["offsets"].append(float(fields[1]))
    _sort_alignment("broad_alignment", out)
    out_list.append(out)

    return out_list


[docs]@convert(['-GTNotes.mat'], no_dot=True)
def from_bach10_mat(mat_fn, sources=range(4)):
    """
    Open a txt file `txt_fn` in the MIREX format (Bach10) and convert it to
    our ground_truth representation. This fills: `precise_alignment`, `pitches`.
    `sources` is an iterable containing the indices of the  sources to be
    considered, where the first source is 0. Returns a list of dictionary, one
    per source.
    """
    out_list = list()

    mat = scipy.io.loadmat(mat_fn)['GTNotes']
    for i in range(len(mat)):
        out = deepcopy(prototype_gt)
        source = mat[i, 0]
        for j in range(len(source)):
            note = source[j, 0]
            out["precise_alignment"]["pitches"].append(
                np.median(np.rint(note[1, :])))
            out["precise_alignment"]["onsets"].append(
                (note[0, 0] - 2) * 10 / 1000.)
            out["precise_alignment"]["offsets"].append(
                (note[0, -1] - 2) * 10 / 1000.)
        _sort_alignment("precise_alignment", out)
        out_list.append(out)

    return out_list


[docs]@convert(['-GTF0s.mat'], no_dot=True)
def from_bach10_f0(nmat_fn, sources=range(4)):
    """
    Open a matlab mat file `nmat_fn` in the MIREX format (Bach10) for frame
    evaluation and convert it to our ground_truth representation. This fills:
    `f0`.  `sources` is an iterable containing the indices of the  sources to
    be considered, where the first source is 0.  Returns a list of dictionary,
    one per source.
    """
    out_list = list()

    f0s = scipy.io.loadmat(nmat_fn)['GTF0s']
    for source in sources:
        out = deepcopy(prototype_gt)
        out["f0"] = f0s[source].tolist()
        out_list.append(out)

    return out_list


[docs]@convert(['.csv'])
def from_musicnet_csv(csv_fn, sr=44100.0):
    """
    Open a csv file `csv_fn` and convert it to our ground_truth representation.
    This fills: `broad_alignment`, `score`, `pitches`.
    This returns a list containing only one dict. `sr` is the samplerate of the
    audio files (MusicNet csv contains the sample number as onset and offsets of
    each note) and it shold be a float.

    N.B. MusicNet contains wav files at 44100 Hz as samplerate.
    N.B. Lowest in pitch in musicnet is 21, so we assume that they count pitch
    starting with 0 as in midi.org standard.
    N.B. `score` times are provided with BPM 60 for all the scores
    """
    data = csv.reader(open(csv_fn), delimiter=',')
    out = deepcopy(prototype_gt)

    # skipping first line
    next(data)

    for row in data:
        # converting everything to float, except the last onw that is the
        # duration name as string
        row = list(map(float, row[:-1]))

        out["broad_alignment"]["onsets"].append(int(row[0]) / sr)
        out["broad_alignment"]["offsets"].append(int(row[1]) / sr)
        out["instrument"] = int(row[2])
        out["broad_alignment"]["pitches"].append(int(row[3]))
        out["score"]["pitches"].append(int(row[3]))
        out["score"]["onsets"].append(float(row[4]))
        out["score"]["offsets"].append(float(row[4]) + float(row[5]))

        out["score"]["beats"] = [
            i for i in range(int(max(out["score"]["offsets"])) + 1)
        ]
    _sort_alignment('score', out)
    _sort_alignment('broad_alignment', out)
    return out


[docs]@convert(['.gt'])
def from_sonic_visualizer(gt_fn, alignment='precise_alignment'):
    """
    Takes a filename of a sonic visualizer output file  exported as 'csv' and
    fills the 'alignment' specified
    """

    min_midi_freq = utils.midi_pitch_to_f0(0)
    data = csv.reader(open(gt_fn), delimiter=',')
    out = deepcopy(prototype_gt)
    for row in data:
        p = float(row[1])
        if p < min_midi_freq:
            continue
        out[alignment]["onsets"].append(float(row[0]))
        out[alignment]["offsets"].append(float(row[0]) + float(row[2]))
        pitch = utils.f0_to_midi_pitch(p)
        out[alignment]["pitches"].append(pitch)

    _sort_alignment(alignment, out)
    return out