Code

Ticket #305: scanfiles.2.py

File scanfiles.2.py, 12.3 KB (added by scanner@…, 9 years ago)

Okay, new version of scanfiles.py with a small sql safe quoting function..

Line 
1#!/usr/bin/env python
2#
3
4import os
5import os.path
6import datetime
7import string
8import optparse
9import exceptions
10import re
11
12import eyeD3
13import eyeD3.tag
14import eyeD3.frames
15import eyeD3.utils
16
17# We are not running inside the django framework so we need to tell it where
18# to find our django app framework.
19#
20os.environ["DJANGO_SETTINGS_MODULE"] = "mediaserv.settings.main"
21
22from django.models.music import *
23
24"""This program is intended to be invoked periodically on the system hosting
25the mediaserv app.
26
27It will load up the mediaserv models.
28
29It will then iterate through the defined MusicRoot's. For each MusicRoot it
30will scan all the files in it and all of its sub-directories.
31
32For every file that is an mp3 it will attempt to parse the ID3 tags for that
33file and based on those tags it will create Artist, ArtistName, Album, and
34Track records in our database.
35
36For every track that we encounter we will update the 'last_scanned' field.
37
38After we finish scanning a MusicRoot we ask for all of the tracks whose
39last_scanned field is before its MusicRoot's last_scan_started date. We will
40then remove those tracks from the system.
41"""
42
43quote_pattern = re.compile("'")
44
45############################################################################
46#
47def q( string ):
48    """Simple function to take the given string and append single quotes ( ' )
49    around it. it will also go through the string first and double any single
50    quotes in it so that they are SQL safe.
51    """
52
53    return "'%s'" % quote_pattern.sub("''", string)
54
55############################################################################
56#
57def is_simple_charset( value ):
58    """Given a string return True if it only contains printable ascii
59    characters, otherwise return false.
60
61    This is our simple test to see if a string is a fancy UTF-8 one or
62    something else, or is a simple plain ASCII string.
63    """
64    for char in value:
65        if char not in string.printable:
66            return False
67    return True
68
69############################################################################
70#
71def create_artist(id3_artist):
72    """We are called with the name of an artist. We create a new Arist object
73    and ArtistName and tie them together.
74
75    If the ArtistName is SJIS or has 8bit characters then we also flag this
76    name as not having a simple character set.
77    """
78
79    art = artists.Artist(date_added = datetime.datetime.now())
80    art.save()
81    art.add_artistname("'%s'" % id3_artist, preference = 0,
82                       simple_char_set = is_simple_charset(id3_artist))
83    return art
84
85############################################################################
86#
87def create_album(id3_album):
88    """Create a new album object with the given name.
89    """
90    alb = albums.Album(name = "'%s'" % id3_album,
91                       date_added = datetime.datetime.now())
92    alb.save()
93    return alb
94   
95############################################################################
96#
97def add_file(filename, af, music_root, verbosity = "quiet"):
98    """We have a file. We know it is an audio/mpeg file. We know we do not have
99    this file in our track db. Query the file for its info and add the
100    requisite objects to our db.
101    """
102
103    tag = af.getTag()
104
105    playtime = af.getPlayTime()
106    (vbr, bitrate) = af.getBitRate()
107    if vbr == 0:
108        vbr = False
109    else:
110        vbr = True
111       
112    # Let us pull out all the id3 tags we have, filling in default info
113    #
114    if tag is None:
115        # The file has no id3 info. We create a title based on the file name
116        # and everything else is empty.
117        #
118        track = tracks.Track(title = "'%s'" % os.path.basename(filename),
119                             filename =  filename,
120                             last_scanned = datetime.datetime.now(),
121                             play_time = playtime, bit_rate = bitrate,
122                             vbr = vbr, musicroot_id = music_root.id)
123        track.save()
124        return
125
126
127    # Get the title, if it does not exist, use the file name.
128    #
129    title = tag.getTitle()
130    if not title:
131        title = filename
132
133    # At this point we have the information to create the basic track object.
134    # The rest of the fields are optional.
135    #
136    track = tracks.Track(title = q(title), filename = filename,
137                         last_scanned = datetime.datetime.now(),
138                         play_time = playtime, bit_rate = bitrate, vbr = vbr,
139                         musicroot_id = music_root.id)
140   
141    # Get the artist tag. See if we have an artist with this exact name.
142    # If we do not, then create a new artist.
143    #
144    id3_artist = tag.getArtist()
145    if id3_artist:
146        try:
147            artist = artistnames.get_object(name__exact = \
148                                            "'%s'" % id3_artist).get_artist()
149        except artistnames.ArtistNameDoesNotExist:
150            artist = create_artist(id3_artist)
151        track.artist_id = artist.id
152
153    # See if we can get the track number & disc number (note: getTrackNum() &
154    # getDiscNum() return a tuple (track num, total tracks) (disc num, total
155    # discs) so we only one element 0 of the tuple.
156    #
157    id3_tracknum = tag.getTrackNum()[0]
158    if id3_tracknum:
159        track.track_number = id3_tracknum
160    id3_discnum = tag.getDiscNum()[0]
161    if id3_discnum:
162        track.disc_number = id3_discnum
163
164    # The album is like the artist. We see if we can find an album that exists
165    # with the exact name. If we can then we use it. If we can not then we
166    # create a new album and use that.
167    #
168    id3_album = tag.getAlbum()
169    if id3_album:
170        try:
171            album = albums.get_object(name__exact = "'%s'" % id3_album)
172        except albums.AlbumDoesNotExist:
173            album = create_album(id3_album)
174        track.album_id = album.id
175
176    # Going to skip genre for now. Just really do not care much about it.
177    #
178    track.save()
179
180    # Depending on the verbosity print out info on the track we just scanned.
181    #
182    if verbosity == "terse":
183        print "Added track: %s" % track
184    elif verbose == "verbose":
185        print "Added track: %s .. (and other info)" % track
186       
187    print "Track %s, last scanned: %s" % (track, track.last_scanned)
188    return
189
190############################################################################
191#
192#
193def scan_file(filename, music_root, verbosity = "quiet"):
194    """This function is given a file name as an actual absolute file path.
195    We will now use the eyeD3 library to scan any id3 tags it may have and
196    based on the content of those tags create tracks, artists, artistnames, and
197    albums in our database.
198
199    If a track object already exists for this file we will see if any of the
200    records in our database need to be updated (and update them.)
201    """
202   
203    # If the file is not an mp3 file we just return.
204    #
205    if not eyeD3.tag.isMp3File(filename):
206        if verbosity == "verbose":
207            print "Skipping file %s (not an audio/mpeg file)" % filename
208        return
209
210    if verbosity == "verbose":
211        print "  Scanning file %s" % filename
212       
213    try:
214        af = eyeD3.tag.Mp3AudioFile(filename)
215    except Exception, e:
216        print "Unable to parse file: %s" % filename
217        return
218    tag = af.getTag()
219    if tag is None:
220        if verbosity == "verbose":
221            print "File %s had no id3 tag information. Filling in defaults" % \
222                  filename
223
224    # First see if a track already exists that refers to this exact same
225    # file. This is because files are the acutal item that identifies a
226    # track. If the file already exists then we already have this track in our
227    # db. We just need to make sure that all the fields we have in the db match
228    # the ones in this file.
229    #
230    try:
231        track = tracks.get_object(filename__exact = filename)
232    except tracks.TrackDoesNotExist:
233        add_file(filename, af, music_root, verbosity)
234        return
235
236    # This track already existed in our db. Check to see if any of its id3 tags
237    # differ from what we already have in the db. If they do, update the db.
238    #
239    print "We would normually update track %s, but we are skipping it for " \
240          "now" % os.path.basename(filename)
241    track.last_scanned = datetime.datetime.now()
242    track.save()
243    #compare_update_file(filename, af, music_root, verbosity = verbosity)
244    return
245
246############################################################################
247#
248#
249def run(verbosity = "quiet"):
250    """This is the function that actually does the work of scanning all of our
251    MusicRoots for .mp3 files.
252
253    It expects a single argument: a string that indicates the verbosity
254    level. This mean be either 'verbose', 'terse', or 'quiet.' If not specified
255    it will default to 'quiet.'
256    """
257
258    # Get the list of defined MusicRoots. These had better point to real
259    # directories!
260    #
261    music_roots = musicroots.get_list()
262    for music_root in music_roots:
263
264        # We first mark that we actually started to scan this music root.
265        #
266        music_root.last_scan_started = datetime.datetime.now()
267        music_root.save()
268
269#        music_root = musicroots.get_object(pk = music_root.id)
270
271        if verbosity == "verbose":
272            print "Started scanning MusicRoot %s at %s" % \
273                  (music_root.directory, music_root.last_scan_started)
274
275        # Then the magic walk happens
276        #
277        for root, dirs, files in os.walk(music_root.directory):
278            if verbosity == "verbose":
279                print "Scanning directory: %s" % root
280               
281            for f in files:
282                check_file = os.path.join(root, f)
283                scan_file(check_file, music_root, verbosity = verbosity)
284
285        # Okay. Our magic walk happeend. Now we need to delete any tracks that
286        # had been a part of this music root but were not scanned in this run
287        #
288        missing_tracks = \
289                       music_root.get_track_list(last_scanned__lt = \
290                                                 music_root.last_scan_started)
291
292        print "\n\n** Music root last scanned: %s" % music_root.last_scan_started
293        for track in missing_tracks:
294            # If it is a member of any playlists remove it..
295            #
296            print "Track %s last scanned: %s" % (track, track.last_scanned)
297            if verbosity == "verbose" or verbosity == "terse":
298                print "Track %s not found in scan. Deleting from MusicRoot " \
299                      "%s" % (track, music_root)
300            track.set_playlists([])
301            track.delete()
302
303        # Done scanning a music root. Indicate when we finished scanning it.
304        #
305        music_root.last_scan_finished = datetime.datetime.now()
306        music_root.save()
307
308    return
309
310############################################################################
311#
312#
313def setup_option_parser():
314    """This function uses the python OptionParser module to define an option
315    parser for parsing the command line options for this script. This does not
316    actually parse the command line options. It returns the parser object that
317    can be used for parsing them.
318    """
319    parser = optparse.OptionParser(usage = "%prog [options]",
320                                   version = "%prog 1.0")
321    parser.add_option("-v", "--verbosity", type="choice", dest="verbosity",
322                      default="terse", choices = ["verbose", "terse",
323                                                  "quiet"],
324                      help = """Controls how talkative the script is about what
325                      it is doing. In 'verbose' mode it will tell you
326                      every track it finds. In 'terse' mode it will only tell
327                      you about tracks that are changed, added or removed.
328                      In 'quiet' mode it will say nothing. DEFAULT:
329                      '%default'""")
330    return parser
331
332############################################################################
333#
334def main():
335    """The main routine. This is invoked if this file is run as a program
336    instead of being imported as a library.
337
338    If you are running this as a module you should not invoke the 'main()'
339    function but should instead invoke the 'run()' function.
340    """
341
342    parser = setup_option_parser()
343    (opts, args) = parser.parse_args()
344
345    run(opts.verbosity)
346
347###########
348#
349# The work starts here
350#
351
352if __name__ == "__main__":
353    main()
354
355#
356#
357#
358###########