· 6 years ago · Jul 24, 2019, 12:14 PM
1from scipy import signal
2from scipy.io import wavfile
3import numpy as np
4import matplotlib.pyplot as plt
5import soundfile as sf
6import os,shutil
7import csv
8
9'''
10Folder path pointing to the folder containing the audio data
11'''
12baseAudioPath = 'C:\\Users\pavlo\\Downloads\\british-birdsong-dataset\\songs\\songs'
13'''
14Folder path pointing to the folder with audio data
15'''
16basePath = 'C:\\Users\pavlo\\Downloads\\british-birdsong-dataset\\songs'
17
18'''
19Folder path pointing to the folder with meta data about the audio
20'''
21CSVMetaDataFile ='C:\\Users\\pavlo\Downloads\\british-birdsong-dataset\\birdsong_metadata.csv'
22
23'''
24Folder path pointing to the folder with spectogram files
25'''
26SpectogramPathFile = 'C:\\Users\pavlo\\Downloads\\british-birdsong-dataset\\songs\\spectograms'
27
28'''
29Folder path pointing to training data
30'''
31trainPath = 'C:\\Users\pavlo\\Downloads\\british-birdsong-dataset\\songs\\train'
32
33'''
34Folder path pointing to validation data
35'''
36validationPath = 'C:\\Users\pavlo\\Downloads\\british-birdsong-dataset\\songs\\validation';
37
38
39'''
40Creates folder if does not exists is specified path
41'''
42def createDirectory(path):
43 if not os.path.isdir(path):
44 os.mkdir(path)
45
46'''
47Reading metadata file to create hashmap table for file id and genus(labels)
48'''
49def readCSVFile():
50 with open(CSVMetaDataFile) as file:
51 dictionary = {}
52 csvReader = csv.reader(file, delimiter = ',')
53 rowNum = 0;
54 for row in csvReader:
55 if rowNum != 0:
56
57 dictionary['xc'+row[0]] = row[2]
58 rowNum = rowNum+1;
59
60 return dictionary;
61
62
63birdDictionary = readCSVFile()
64audioList = os.listdir(baseAudioPath)
65print(len(birdDictionary))
66
67'''
68Reads audio file and generates spectogram for every second of audio.
69!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
70'''
71
72def processSpectogram(file,path,type):
73
74 data, rate = sf.read(file)
75
76 freqs, times, Sx = signal.spectrogram(data, fs=rate, window='hanning',
77 nperseg=511, noverlap=192,
78 detrend=False, scaling='spectrum')
79 videoLength = len(data) / rate
80 timeOverSx = videoLength / len(times)
81 windowOffset = round(0.1 / timeOverSx);
82 Sx = np.log(np.abs(Sx));
83 currStartPosition = 0
84 currEndPosition = 255
85 while currEndPosition < len(times):
86
87
88
89
90
91 numofFile = len(os.listdir(path)) + 1;
92 fileName = type + '_' + str(numofFile) + '.jpg';
93 print(os.path.join(path, fileName))
94 plt.imsave(os.path.join(path, fileName), Sx[0:255, currStartPosition : currEndPosition])
95
96 currStartPosition = currStartPosition+windowOffset;
97 currEndPosition = currEndPosition+windowOffset;
98
99
100
101'''
102!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
103'''
104
105
106
107
108
109
110
111
112
113'''
114Creates directory for every genus(label) and saves spectograms for every audio file
115'''
116
117def CreateData():
118
119 for files in audioList:
120 audioPath = os.path.join(baseAudioPath,files)
121
122 type = birdDictionary.get(os.path.splitext(os.path.basename(files))[0])
123 createDirectory(os.path.join(SpectogramPathFile, type))
124 processSpectogram(audioPath,os.path.join(SpectogramPathFile,type),type)
125
126
127
128'''
129Creates directory for every genus(label) and saves spectograms for every audio file
130'''
131def createModelDir():
132 for dir in os.listdir(SpectogramPathFile):
133 createDirectory(os.path.join(trainPath, dir))
134 createDirectory(os.path.join(validationPath, dir))
135
136'''
137Creates list of data for training and validation base on the number of files in each folder
13880% to trainig 20% to validation
139'''
140def CalcData():
141 trainData = [];
142 validationData = [];
143 for dir in os.listdir(SpectogramPathFile):
144 print(dir)
145 totalSize = len(os.listdir(os.path.join(SpectogramPathFile, dir)))
146 trainSize = int(len(os.listdir(os.path.join(SpectogramPathFile, dir))) * 0.8)
147 trainData.append([dir+'_{}.jpg'.format(i) for i in range(1,trainSize)])
148 validationData.append([dir+'_{}.jpg'.format(i) for i in range(trainSize,totalSize+1)])
149
150 return trainData, validationData
151
152'''
153Gets the file name without extension
154'''
155def ParseFileNameToDir(fileName):
156 fs = fileName.split('_');
157 return fs[0];
158
159'''
160Copies the data from original directory to training and validation.
161'''
162def copyData(data,path):
163
164
165 for sequence in data:
166 folderName = ParseFileNameToDir(sequence[0]);
167 srcDir = os.path.join(SpectogramPathFile,folderName)
168 dstDir = os.path.join(path,folderName);
169 for file in sequence:
170 srcFile = os.path.join(srcDir,file)
171 dstFile = os.path.join(dstDir,file)
172 shutil.copy(srcFile,dstFile)