Module heyvi.cap

Expand source code Browse git
import vipy
import heyvi
import torch
import pycollector
import pycollector.version
import pycollector.label
import contextlib
import gc




class CAP():
    """heyvi.system.CAP class

    """
    
    def __init__(self, modelfile=None):
        
        assert vipy.version.is_at_least('1.12.4')
        assert heyvi.version.is_at_least('0.2.13')
        assert pycollector.version.is_at_least('0.4.2')        
        assert torch.cuda.device_count() >= 4
        self._unitnorm = False
        
        #self._activitymodel = './cap_epoch_15_step_64063.ckpt'  # local testing only
        #self._activitymodel = './cap_epoch_17_step_72071.ckpt'  # local testing only
        #self._activitymodel = './_calibrate.ckpt'  # local testing only
        self._activitymodel = './cap_l2norm_e23s96095.ckpt' if modelfile is None else modelfile  # local testing only        
        self._unitnorm = True

        self._annotator = lambda im, f=vipy.image.mutator_show_trackindex_verbonly(confidence=True): f(im).annotate(timestamp=heyvi.util.timestamp(), timestampoffset=(6,10), fontsize=15).rgb()        

        
    def __call__(self, vi, minconf=0.04, verbose=True, frame_callback=None, livestream=False, mintracklen=None, finalized=True):

        assert isinstance(vi, vipy.video.Scene)

        livedelay = 2*15*5 if vi.islive() or livestream else 5 
        objects = ['person', ('car','vehicle'), ('truck','vehicle'), ('bus', 'vehicle'), 'bicycle']  # merge truck/bus/car to vehicle, no motorcycles
        track = heyvi.detection.MultiscaleVideoTracker(gpu=[0,1,2,3], batchsize=9, minconf=0.05, trackconf=0.2, maxhistory=5, objects=objects, overlapfrac=6, gate=64, detbatchsize=None)
        detect = heyvi.recognition.ActivityTrackerCap(gpus=[0,1,2,3], batchsize=64, modelfile=self._activitymodel, stride=3, unitnorm=self._unitnorm)   # stride should match tracker stride 4->3
        
        gc.disable()
        (srcdim, srcfps) = (vi.mindim(), vi.framerate())
        vi = vi.mindim(960).framerate(5)
        for (f, (im,vi)) in enumerate(zip(vi.stream(buffered=True).frame(delay=livedelay),  # live stream delay (must be >= 2x finalized period)
                                          detect(track(vi, stride=3, buffered=vi.islive()),
                                                 mirror=False, trackconf=0.2, minprob=minconf, maxdets=105, avgdets=70, throttle=True, activityiou=0.1, buffered=vi.islive(), finalized=(livedelay//2) if vi.islive() or livestream else finalized, mintracklen=mintracklen))):
            if callable(frame_callback) and im is not None:
                frame_callback(self._annotator(im.clone()), im, vi)  
            if verbose:
                print('[heyvi.system.Actev21][%s][%d]: %s' % (heyvi.util.timestamp(), f, vi), end='\r')                                    
                
        vi.activityfilter(lambda a: a.category() not in ['person', 'person_walks', 'vehicle', 'car_moves'])   # remove background activities
        vo = vi.framerate(srcfps)  # upsample tracks/activities back to source framerate
        vo = vo.mindim(srcdim)  # upscale tracks back to source resolution
        gc.enable()

        return vo


    def annotate(self, v, outfile, minconf=0.1, trackonly=False, nounonly=False, mindim=512):
        return (v.mindim(mindim).activityfilter(lambda a: a.confidence() >= float(minconf))
                .annotate(mutator=vipy.image.mutator_show_trackindex_verbonly(confidence=True) if (not trackonly and not nounonly) else (vipy.image.mutator_show_trackonly() if trackonly else vipy.image.mutator_show_nounonly(nocaption=True)),
                          timestamp=True,
                          fontsize=6,
                          outfile=outfile))  # colored boxes by track id, activity captions with confidence, 5Hz, 512x(-1) resolution    
    
    
    def detect(self, vi, minconf=0.15):
        assert isinstance(vi, vipy.video.Scene)
        return self.__call__(vi.clone().clear().framerate(5), minconf=minconf)

    
    def classify(self, vi, minconf=0.01, topk=3, repeat=3):
        assert isinstance(vi, vipy.video.Scene)
        v = vi.clone().clear().framerate(5).load()
        v = v.fromframes([vj for k in range(repeat) for vj in v.framelist()], copy=True)  # repeat to achieve minimums
        v = self.__call__(v, minconf=minconf, finalized=False)
        ai = set([a.id() for a in sorted(v.activitylist(), key=lambda a: a.confidence())[-topk:]])
        return v.activityfilter(lambda a: a.id() in ai).activities({a.id():a for a in sorted(v.activities().values(), key=lambda a: a.confidence(), reverse=True)})

Classes

class CAP (modelfile=None)

heyvi.system.CAP class

Expand source code Browse git
class CAP():
    """heyvi.system.CAP class

    """
    
    def __init__(self, modelfile=None):
        
        assert vipy.version.is_at_least('1.12.4')
        assert heyvi.version.is_at_least('0.2.13')
        assert pycollector.version.is_at_least('0.4.2')        
        assert torch.cuda.device_count() >= 4
        self._unitnorm = False
        
        #self._activitymodel = './cap_epoch_15_step_64063.ckpt'  # local testing only
        #self._activitymodel = './cap_epoch_17_step_72071.ckpt'  # local testing only
        #self._activitymodel = './_calibrate.ckpt'  # local testing only
        self._activitymodel = './cap_l2norm_e23s96095.ckpt' if modelfile is None else modelfile  # local testing only        
        self._unitnorm = True

        self._annotator = lambda im, f=vipy.image.mutator_show_trackindex_verbonly(confidence=True): f(im).annotate(timestamp=heyvi.util.timestamp(), timestampoffset=(6,10), fontsize=15).rgb()        

        
    def __call__(self, vi, minconf=0.04, verbose=True, frame_callback=None, livestream=False, mintracklen=None, finalized=True):

        assert isinstance(vi, vipy.video.Scene)

        livedelay = 2*15*5 if vi.islive() or livestream else 5 
        objects = ['person', ('car','vehicle'), ('truck','vehicle'), ('bus', 'vehicle'), 'bicycle']  # merge truck/bus/car to vehicle, no motorcycles
        track = heyvi.detection.MultiscaleVideoTracker(gpu=[0,1,2,3], batchsize=9, minconf=0.05, trackconf=0.2, maxhistory=5, objects=objects, overlapfrac=6, gate=64, detbatchsize=None)
        detect = heyvi.recognition.ActivityTrackerCap(gpus=[0,1,2,3], batchsize=64, modelfile=self._activitymodel, stride=3, unitnorm=self._unitnorm)   # stride should match tracker stride 4->3
        
        gc.disable()
        (srcdim, srcfps) = (vi.mindim(), vi.framerate())
        vi = vi.mindim(960).framerate(5)
        for (f, (im,vi)) in enumerate(zip(vi.stream(buffered=True).frame(delay=livedelay),  # live stream delay (must be >= 2x finalized period)
                                          detect(track(vi, stride=3, buffered=vi.islive()),
                                                 mirror=False, trackconf=0.2, minprob=minconf, maxdets=105, avgdets=70, throttle=True, activityiou=0.1, buffered=vi.islive(), finalized=(livedelay//2) if vi.islive() or livestream else finalized, mintracklen=mintracklen))):
            if callable(frame_callback) and im is not None:
                frame_callback(self._annotator(im.clone()), im, vi)  
            if verbose:
                print('[heyvi.system.Actev21][%s][%d]: %s' % (heyvi.util.timestamp(), f, vi), end='\r')                                    
                
        vi.activityfilter(lambda a: a.category() not in ['person', 'person_walks', 'vehicle', 'car_moves'])   # remove background activities
        vo = vi.framerate(srcfps)  # upsample tracks/activities back to source framerate
        vo = vo.mindim(srcdim)  # upscale tracks back to source resolution
        gc.enable()

        return vo


    def annotate(self, v, outfile, minconf=0.1, trackonly=False, nounonly=False, mindim=512):
        return (v.mindim(mindim).activityfilter(lambda a: a.confidence() >= float(minconf))
                .annotate(mutator=vipy.image.mutator_show_trackindex_verbonly(confidence=True) if (not trackonly and not nounonly) else (vipy.image.mutator_show_trackonly() if trackonly else vipy.image.mutator_show_nounonly(nocaption=True)),
                          timestamp=True,
                          fontsize=6,
                          outfile=outfile))  # colored boxes by track id, activity captions with confidence, 5Hz, 512x(-1) resolution    
    
    
    def detect(self, vi, minconf=0.15):
        assert isinstance(vi, vipy.video.Scene)
        return self.__call__(vi.clone().clear().framerate(5), minconf=minconf)

    
    def classify(self, vi, minconf=0.01, topk=3, repeat=3):
        assert isinstance(vi, vipy.video.Scene)
        v = vi.clone().clear().framerate(5).load()
        v = v.fromframes([vj for k in range(repeat) for vj in v.framelist()], copy=True)  # repeat to achieve minimums
        v = self.__call__(v, minconf=minconf, finalized=False)
        ai = set([a.id() for a in sorted(v.activitylist(), key=lambda a: a.confidence())[-topk:]])
        return v.activityfilter(lambda a: a.id() in ai).activities({a.id():a for a in sorted(v.activities().values(), key=lambda a: a.confidence(), reverse=True)})

Methods

def annotate(self, v, outfile, minconf=0.1, trackonly=False, nounonly=False, mindim=512)
Expand source code Browse git
def annotate(self, v, outfile, minconf=0.1, trackonly=False, nounonly=False, mindim=512):
    return (v.mindim(mindim).activityfilter(lambda a: a.confidence() >= float(minconf))
            .annotate(mutator=vipy.image.mutator_show_trackindex_verbonly(confidence=True) if (not trackonly and not nounonly) else (vipy.image.mutator_show_trackonly() if trackonly else vipy.image.mutator_show_nounonly(nocaption=True)),
                      timestamp=True,
                      fontsize=6,
                      outfile=outfile))  # colored boxes by track id, activity captions with confidence, 5Hz, 512x(-1) resolution    
def classify(self, vi, minconf=0.01, topk=3, repeat=3)
Expand source code Browse git
def classify(self, vi, minconf=0.01, topk=3, repeat=3):
    assert isinstance(vi, vipy.video.Scene)
    v = vi.clone().clear().framerate(5).load()
    v = v.fromframes([vj for k in range(repeat) for vj in v.framelist()], copy=True)  # repeat to achieve minimums
    v = self.__call__(v, minconf=minconf, finalized=False)
    ai = set([a.id() for a in sorted(v.activitylist(), key=lambda a: a.confidence())[-topk:]])
    return v.activityfilter(lambda a: a.id() in ai).activities({a.id():a for a in sorted(v.activities().values(), key=lambda a: a.confidence(), reverse=True)})
def detect(self, vi, minconf=0.15)
Expand source code Browse git
def detect(self, vi, minconf=0.15):
    assert isinstance(vi, vipy.video.Scene)
    return self.__call__(vi.clone().clear().framerate(5), minconf=minconf)