Ticket #305: scanfiles.py

File scanfiles.py, 11.8 KB (added by scanner@…, 10 years ago)

program that scans a 'music root' for files, extracts id3 tags, and fills in the db from that

Line 
1#!/usr/bin/env python
2#
3
4import os
5import os.path
6import datetime
7import string
8import optparse
9import exceptions
10
11import eyeD3
12import eyeD3.tag
13import eyeD3.frames
14import eyeD3.utils
15
16# We are not running inside the django framework so we need to tell it where
17# to find our django app framework.
18#
19os.environ["DJANGO_SETTINGS_MODULE"] = "mediaserv.settings.main"
20
21from django.models.music import *
22
23"""This program is intended to be invoked periodically on the system hosting
24the mediaserv app.
25
26It will load up the mediaserv models.
27
28It will then iterate through the defined MusicRoot's. For each MusicRoot it
29will scan all the files in it and all of its sub-directories.
30
31For every file that is an mp3 it will attempt to parse the ID3 tags for that
32file and based on those tags it will create Artist, ArtistName, Album, and
33Track records in our database.
34
35For every track that we encounter we will update the 'last_scanned' field.
36
37After we finish scanning a MusicRoot we ask for all of the tracks whose
38last_scanned field is before its MusicRoot's last_scan_started date. We will
39then remove those tracks from the system.
40"""
41
42############################################################################
43#
44def is_simple_charset( value ):
45    """Given a string return True if it only contains printable ascii
46    characters, otherwise return false.
47
48    This is our simple test to see if a string is a fancy UTF-8 one or
49    something else, or is a simple plain ASCII string.
50    """
51    for char in value:
52        if char not in string.printable:
53            return False
54    return True
55
56############################################################################
57#
58def create_artist(id3_artist):
59    """We are called with the name of an artist. We create a new Arist object
60    and ArtistName and tie them together.
61
62    If the ArtistName is SJIS or has 8bit characters then we also flag this
63    name as not having a simple character set.
64    """
65
66    art = artists.Artist(date_added = datetime.datetime.now())
67    art.save()
68    art.add_artistname("'%s'" % id3_artist, preference = 0,
69                       simple_char_set = is_simple_charset(id3_artist))
70    return art
71
72############################################################################
73#
74def create_album(id3_album):
75    """Create a new album object with the given name.
76    """
77    alb = albums.Album(name = "'%s'" % id3_album,
78                       date_added = datetime.datetime.now())
79    alb.save()
80    return alb
81   
82############################################################################
83#
84def add_file(filename, af, music_root, verbosity = "quiet"):
85    """We have a file. We know it is an audio/mpeg file. We know we do not have
86    this file in our track db. Query the file for its info and add the
87    requisite objects to our db.
88    """
89
90    tag = af.getTag()
91
92    playtime = af.getPlayTime()
93    (vbr, bitrate) = af.getBitRate()
94    if vbr == 0:
95        vbr = False
96    else:
97        vbr = True
98       
99    # Let us pull out all the id3 tags we have, filling in default info
100    #
101    if tag is None:
102        # The file has no id3 info. We create a title based on the file name
103        # and everything else is empty.
104        #
105        track = tracks.Track(title = "'%s'" % os.path.basename(filename),
106                             filename =  filename,
107                             last_scanned = datetime.datetime.now(),
108                             play_time = playtime, bit_rate = bitrate,
109                             vbr = vbr, musicroot_id = music_root.id)
110        track.save()
111        return
112
113
114    # Get the title, if it does not exist, use the file name.
115    #
116    title = tag.getTitle()
117    if not title:
118        title = filename
119
120    # At this point we have the information to create the basic track object.
121    # The rest of the fields are optional.
122    #
123    track = tracks.Track(title = "'%s'" % title, filename = filename,
124                         last_scanned = datetime.datetime.now(),
125                         play_time = playtime, bit_rate = bitrate, vbr = vbr,
126                         musicroot_id = music_root.id)
127   
128    # Get the artist tag. See if we have an artist with this exact name.
129    # If we do not, then create a new artist.
130    #
131    id3_artist = tag.getArtist()
132    if id3_artist:
133        try:
134            artist = artistnames.get_object(name__exact = \
135                                            "'%s'" % id3_artist).get_artist()
136        except artistnames.ArtistNameDoesNotExist:
137            artist = create_artist(id3_artist)
138        track.artist_id = artist.id
139
140    # See if we can get the track number & disc number (note: getTrackNum() &
141    # getDiscNum() return a tuple (track num, total tracks) (disc num, total
142    # discs) so we only one element 0 of the tuple.
143    #
144    id3_tracknum = tag.getTrackNum()[0]
145    if id3_tracknum:
146        track.track_number = id3_tracknum
147    id3_discnum = tag.getDiscNum()[0]
148    if id3_discnum:
149        track.disc_number = id3_discnum
150
151    # The album is like the artist. We see if we can find an album that exists
152    # with the exact name. If we can then we use it. If we can not then we
153    # create a new album and use that.
154    #
155    id3_album = tag.getAlbum()
156    if id3_album:
157        try:
158            album = albums.get_object(name__exact = "'%s'" % id3_album)
159        except albums.AlbumDoesNotExist:
160            album = create_album(id3_album)
161        track.album_id = album.id
162
163    # Going to skip genre for now. Just really do not care much about it.
164    #
165    track.save()
166
167    # Depending on the verbosity print out info on the track we just scanned.
168    #
169    if verbosity == "terse":
170        print "Added track: %s" % track
171    elif verbose == "verbose":
172        print "Added track: %s .. (and other info)" % track
173       
174    print "Track %s, last scanned: %s" % (track, track.last_scanned)
175    return
176
177############################################################################
178#
179#
180def scan_file(filename, music_root, verbosity = "quiet"):
181    """This function is given a file name as an actual absolute file path.
182    We will now use the eyeD3 library to scan any id3 tags it may have and
183    based on the content of those tags create tracks, artists, artistnames, and
184    albums in our database.
185
186    If a track object already exists for this file we will see if any of the
187    records in our database need to be updated (and update them.)
188    """
189   
190    # If the file is not an mp3 file we just return.
191    #
192    if not eyeD3.tag.isMp3File(filename):
193        if verbosity == "verbose":
194            print "Skipping file %s (not an audio/mpeg file)" % filename
195        return
196
197    if verbosity == "verbose":
198        print "  Scanning file %s" % filename
199       
200    try:
201        af = eyeD3.tag.Mp3AudioFile(filename)
202    except Exception, e:
203        print "Unable to parse file: %s" % filename
204        return
205    tag = af.getTag()
206    if tag is None:
207        if verbosity == "verbose":
208            print "File %s had no id3 tag information. Filling in defaults" % \
209                  filename
210
211    # First see if a track already exists that refers to this exact same
212    # file. This is because files are the acutal item that identifies a
213    # track. If the file already exists then we already have this track in our
214    # db. We just need to make sure that all the fields we have in the db match
215    # the ones in this file.
216    #
217    try:
218        track = tracks.get_object(filename__exact = filename)
219    except tracks.TrackDoesNotExist:
220        add_file(filename, af, music_root, verbosity)
221        return
222
223    # This track already existed in our db. Check to see if any of its id3 tags
224    # differ from what we already have in the db. If they do, update the db.
225    #
226    print "We would normually update track %s, but we are skipping it for " \
227          "now" % os.path.basename(filename)
228    #compare_update_file(filename, af, music_root, verbosity = verbosity)
229    return
230
231############################################################################
232#
233#
234def run(verbosity = "quiet"):
235    """This is the function that actually does the work of scanning all of our
236    MusicRoots for .mp3 files.
237
238    It expects a single argument: a string that indicates the verbosity
239    level. This mean be either 'verbose', 'terse', or 'quiet.' If not specified
240    it will default to 'quiet.'
241    """
242
243    # Get the list of defined MusicRoots. These had better point to real
244    # directories!
245    #
246    music_roots = musicroots.get_list()
247    for music_root in music_roots:
248
249        # We first mark that we actually started to scan this music root.
250        #
251        music_root.last_scan_started = datetime.datetime.now()
252        music_root.save()
253
254#        music_root = musicroots.get_object(pk = music_root.id)
255
256        if verbosity == "verbose":
257            print "Started scanning MusicRoot %s at %s" % \
258                  (music_root.directory, music_root.last_scan_started)
259
260        # Then the magic walk happens
261        #
262        for root, dirs, files in os.walk(music_root.directory):
263            if verbosity == "verbose":
264                print "Scanning directory: %s" % root
265               
266            for f in files:
267                check_file = os.path.join(root, f)
268                scan_file(check_file, music_root, verbosity = verbosity)
269
270        # Okay. Our magic walk happeend. Now we need to delete any tracks that
271        # had been a part of this music root but were not scanned in this run
272        #
273        missing_tracks = tracks.get_list(last_scanned__lt = \
274                                         music_root.last_scan_started)
275
276        print "\n\n** Music root last scanned: %s" % music_root.last_scan_started
277        for track in missing_tracks:
278            # If it is a member of any playlists remove it..
279            #
280            print "Track %s last scanned: %s" % (track, track.last_scanned)
281            if verbosity == "verbose" or verbosity == "terse":
282                print "Track %s not found in scan. Deleting from MusicRoot " \
283                      "%s" % (track, music_root)
284            track.set_playlists([])
285            track.delete()
286
287        # Done scanning a music root. Indicate when we finished scanning it.
288        #
289        music_root.last_scan_finished = datetime.datetime.now()
290        music_root.save()
291
292    return
293
294############################################################################
295#
296#
297def setup_option_parser():
298    """This function uses the python OptionParser module to define an option
299    parser for parsing the command line options for this script. This does not
300    actually parse the command line options. It returns the parser object that
301    can be used for parsing them.
302    """
303    parser = optparse.OptionParser(usage = "%prog [options]",
304                                   version = "%prog 1.0")
305    parser.add_option("-v", "--verbosity", type="choice", dest="verbosity",
306                      default="terse", choices = ["verbose", "terse",
307                                                  "quiet"],
308                      help = """Controls how talkative the script is about what
309                      it is doing. In 'verbose' mode it will tell you
310                      every track it finds. In 'terse' mode it will only tell
311                      you about tracks that are changed, added or removed.
312                      In 'quiet' mode it will say nothing. DEFAULT:
313                      '%default'""")
314    return parser
315
316############################################################################
317#
318def main():
319    """The main routine. This is invoked if this file is run as a program
320    instead of being imported as a library.
321
322    If you are running this as a module you should not invoke the 'main()'
323    function but should instead invoke the 'run()' function.
324    """
325
326    parser = setup_option_parser()
327    (opts, args) = parser.parse_args()
328
329    run(opts.verbosity)
330
331###########
332#
333# The work starts here
334#
335
336if __name__ == "__main__":
337    main()
338
339#
340#
341#
342###########
Back to Top