Ticket #305: scanfiles.2.py

File scanfiles.2.py, 12.3 KB (added by scanner@…, 19 years ago)

Okay, new version of scanfiles.py with a small sql safe quoting function..

Line 
1#!/usr/bin/env python
2#
3
4import os
5import os.path
6import datetime
7import string
8import optparse
9import exceptions
10import re
11
12import eyeD3
13import eyeD3.tag
14import eyeD3.frames
15import eyeD3.utils
16
17# We are not running inside the django framework so we need to tell it where
18# to find our django app framework.
19#
20os.environ["DJANGO_SETTINGS_MODULE"] = "mediaserv.settings.main"
21
22from django.models.music import *
23
24"""This program is intended to be invoked periodically on the system hosting
25the mediaserv app.
26
27It will load up the mediaserv models.
28
29It will then iterate through the defined MusicRoot's. For each MusicRoot it
30will scan all the files in it and all of its sub-directories.
31
32For every file that is an mp3 it will attempt to parse the ID3 tags for that
33file and based on those tags it will create Artist, ArtistName, Album, and
34Track records in our database.
35
36For every track that we encounter we will update the 'last_scanned' field.
37
38After we finish scanning a MusicRoot we ask for all of the tracks whose
39last_scanned field is before its MusicRoot's last_scan_started date. We will
40then remove those tracks from the system.
41"""
42
43quote_pattern = re.compile("'")
44
45############################################################################
46#
47def q( string ):
48 """Simple function to take the given string and append single quotes ( ' )
49 around it. it will also go through the string first and double any single
50 quotes in it so that they are SQL safe.
51 """
52
53 return "'%s'" % quote_pattern.sub("''", string)
54
55############################################################################
56#
57def is_simple_charset( value ):
58 """Given a string return True if it only contains printable ascii
59 characters, otherwise return false.
60
61 This is our simple test to see if a string is a fancy UTF-8 one or
62 something else, or is a simple plain ASCII string.
63 """
64 for char in value:
65 if char not in string.printable:
66 return False
67 return True
68
69############################################################################
70#
71def create_artist(id3_artist):
72 """We are called with the name of an artist. We create a new Arist object
73 and ArtistName and tie them together.
74
75 If the ArtistName is SJIS or has 8bit characters then we also flag this
76 name as not having a simple character set.
77 """
78
79 art = artists.Artist(date_added = datetime.datetime.now())
80 art.save()
81 art.add_artistname("'%s'" % id3_artist, preference = 0,
82 simple_char_set = is_simple_charset(id3_artist))
83 return art
84
85############################################################################
86#
87def create_album(id3_album):
88 """Create a new album object with the given name.
89 """
90 alb = albums.Album(name = "'%s'" % id3_album,
91 date_added = datetime.datetime.now())
92 alb.save()
93 return alb
94
95############################################################################
96#
97def add_file(filename, af, music_root, verbosity = "quiet"):
98 """We have a file. We know it is an audio/mpeg file. We know we do not have
99 this file in our track db. Query the file for its info and add the
100 requisite objects to our db.
101 """
102
103 tag = af.getTag()
104
105 playtime = af.getPlayTime()
106 (vbr, bitrate) = af.getBitRate()
107 if vbr == 0:
108 vbr = False
109 else:
110 vbr = True
111
112 # Let us pull out all the id3 tags we have, filling in default info
113 #
114 if tag is None:
115 # The file has no id3 info. We create a title based on the file name
116 # and everything else is empty.
117 #
118 track = tracks.Track(title = "'%s'" % os.path.basename(filename),
119 filename = filename,
120 last_scanned = datetime.datetime.now(),
121 play_time = playtime, bit_rate = bitrate,
122 vbr = vbr, musicroot_id = music_root.id)
123 track.save()
124 return
125
126
127 # Get the title, if it does not exist, use the file name.
128 #
129 title = tag.getTitle()
130 if not title:
131 title = filename
132
133 # At this point we have the information to create the basic track object.
134 # The rest of the fields are optional.
135 #
136 track = tracks.Track(title = q(title), filename = filename,
137 last_scanned = datetime.datetime.now(),
138 play_time = playtime, bit_rate = bitrate, vbr = vbr,
139 musicroot_id = music_root.id)
140
141 # Get the artist tag. See if we have an artist with this exact name.
142 # If we do not, then create a new artist.
143 #
144 id3_artist = tag.getArtist()
145 if id3_artist:
146 try:
147 artist = artistnames.get_object(name__exact = \
148 "'%s'" % id3_artist).get_artist()
149 except artistnames.ArtistNameDoesNotExist:
150 artist = create_artist(id3_artist)
151 track.artist_id = artist.id
152
153 # See if we can get the track number & disc number (note: getTrackNum() &
154 # getDiscNum() return a tuple (track num, total tracks) (disc num, total
155 # discs) so we only one element 0 of the tuple.
156 #
157 id3_tracknum = tag.getTrackNum()[0]
158 if id3_tracknum:
159 track.track_number = id3_tracknum
160 id3_discnum = tag.getDiscNum()[0]
161 if id3_discnum:
162 track.disc_number = id3_discnum
163
164 # The album is like the artist. We see if we can find an album that exists
165 # with the exact name. If we can then we use it. If we can not then we
166 # create a new album and use that.
167 #
168 id3_album = tag.getAlbum()
169 if id3_album:
170 try:
171 album = albums.get_object(name__exact = "'%s'" % id3_album)
172 except albums.AlbumDoesNotExist:
173 album = create_album(id3_album)
174 track.album_id = album.id
175
176 # Going to skip genre for now. Just really do not care much about it.
177 #
178 track.save()
179
180 # Depending on the verbosity print out info on the track we just scanned.
181 #
182 if verbosity == "terse":
183 print "Added track: %s" % track
184 elif verbose == "verbose":
185 print "Added track: %s .. (and other info)" % track
186
187 print "Track %s, last scanned: %s" % (track, track.last_scanned)
188 return
189
190############################################################################
191#
192#
193def scan_file(filename, music_root, verbosity = "quiet"):
194 """This function is given a file name as an actual absolute file path.
195 We will now use the eyeD3 library to scan any id3 tags it may have and
196 based on the content of those tags create tracks, artists, artistnames, and
197 albums in our database.
198
199 If a track object already exists for this file we will see if any of the
200 records in our database need to be updated (and update them.)
201 """
202
203 # If the file is not an mp3 file we just return.
204 #
205 if not eyeD3.tag.isMp3File(filename):
206 if verbosity == "verbose":
207 print "Skipping file %s (not an audio/mpeg file)" % filename
208 return
209
210 if verbosity == "verbose":
211 print " Scanning file %s" % filename
212
213 try:
214 af = eyeD3.tag.Mp3AudioFile(filename)
215 except Exception, e:
216 print "Unable to parse file: %s" % filename
217 return
218 tag = af.getTag()
219 if tag is None:
220 if verbosity == "verbose":
221 print "File %s had no id3 tag information. Filling in defaults" % \
222 filename
223
224 # First see if a track already exists that refers to this exact same
225 # file. This is because files are the acutal item that identifies a
226 # track. If the file already exists then we already have this track in our
227 # db. We just need to make sure that all the fields we have in the db match
228 # the ones in this file.
229 #
230 try:
231 track = tracks.get_object(filename__exact = filename)
232 except tracks.TrackDoesNotExist:
233 add_file(filename, af, music_root, verbosity)
234 return
235
236 # This track already existed in our db. Check to see if any of its id3 tags
237 # differ from what we already have in the db. If they do, update the db.
238 #
239 print "We would normually update track %s, but we are skipping it for " \
240 "now" % os.path.basename(filename)
241 track.last_scanned = datetime.datetime.now()
242 track.save()
243 #compare_update_file(filename, af, music_root, verbosity = verbosity)
244 return
245
246############################################################################
247#
248#
249def run(verbosity = "quiet"):
250 """This is the function that actually does the work of scanning all of our
251 MusicRoots for .mp3 files.
252
253 It expects a single argument: a string that indicates the verbosity
254 level. This mean be either 'verbose', 'terse', or 'quiet.' If not specified
255 it will default to 'quiet.'
256 """
257
258 # Get the list of defined MusicRoots. These had better point to real
259 # directories!
260 #
261 music_roots = musicroots.get_list()
262 for music_root in music_roots:
263
264 # We first mark that we actually started to scan this music root.
265 #
266 music_root.last_scan_started = datetime.datetime.now()
267 music_root.save()
268
269# music_root = musicroots.get_object(pk = music_root.id)
270
271 if verbosity == "verbose":
272 print "Started scanning MusicRoot %s at %s" % \
273 (music_root.directory, music_root.last_scan_started)
274
275 # Then the magic walk happens
276 #
277 for root, dirs, files in os.walk(music_root.directory):
278 if verbosity == "verbose":
279 print "Scanning directory: %s" % root
280
281 for f in files:
282 check_file = os.path.join(root, f)
283 scan_file(check_file, music_root, verbosity = verbosity)
284
285 # Okay. Our magic walk happeend. Now we need to delete any tracks that
286 # had been a part of this music root but were not scanned in this run
287 #
288 missing_tracks = \
289 music_root.get_track_list(last_scanned__lt = \
290 music_root.last_scan_started)
291
292 print "\n\n** Music root last scanned: %s" % music_root.last_scan_started
293 for track in missing_tracks:
294 # If it is a member of any playlists remove it..
295 #
296 print "Track %s last scanned: %s" % (track, track.last_scanned)
297 if verbosity == "verbose" or verbosity == "terse":
298 print "Track %s not found in scan. Deleting from MusicRoot " \
299 "%s" % (track, music_root)
300 track.set_playlists([])
301 track.delete()
302
303 # Done scanning a music root. Indicate when we finished scanning it.
304 #
305 music_root.last_scan_finished = datetime.datetime.now()
306 music_root.save()
307
308 return
309
310############################################################################
311#
312#
313def setup_option_parser():
314 """This function uses the python OptionParser module to define an option
315 parser for parsing the command line options for this script. This does not
316 actually parse the command line options. It returns the parser object that
317 can be used for parsing them.
318 """
319 parser = optparse.OptionParser(usage = "%prog [options]",
320 version = "%prog 1.0")
321 parser.add_option("-v", "--verbosity", type="choice", dest="verbosity",
322 default="terse", choices = ["verbose", "terse",
323 "quiet"],
324 help = """Controls how talkative the script is about what
325 it is doing. In 'verbose' mode it will tell you
326 every track it finds. In 'terse' mode it will only tell
327 you about tracks that are changed, added or removed.
328 In 'quiet' mode it will say nothing. DEFAULT:
329 '%default'""")
330 return parser
331
332############################################################################
333#
334def main():
335 """The main routine. This is invoked if this file is run as a program
336 instead of being imported as a library.
337
338 If you are running this as a module you should not invoke the 'main()'
339 function but should instead invoke the 'run()' function.
340 """
341
342 parser = setup_option_parser()
343 (opts, args) = parser.parse_args()
344
345 run(opts.verbosity)
346
347###########
348#
349# The work starts here
350#
351
352if __name__ == "__main__":
353 main()
354
355#
356#
357#
358###########
Back to Top