EE 586L Report
EE 586L Report
EE 586L Report
Baseline
Here is a snippet of the relevant (to be optimized) sections of my code. It had a dt value of roughly 5866 tics.
# define delay 1000
short filt_buff [ delay ] = {0};
float alpha = .1;
short filter ( short input ) {
int i ;
// iir comb filter
short out = filt_buff [0]* alpha + input *(1 - alpha );
for ( i = 1; i < delay ; i ++) {
filt_buff [i -1] = filt_buff [ i ];
}
filt_buff [ delay -1] = out ;
return out ;
}
// Interrupt Service Routine
interrupt void c_int11 () {
tic ();
// Start the timer
if (++ loopindex >= loop_count ) {
loopindex = 0;
if ( led_state ) {
DSK6713_LED_on (0);
led_state = 0;
} else {
DSK6713_LED_off (0);
led_state = 1;
}
}
if (++ period_indx >= period_count ) {
period_indx = 0;
}
short smpl = 0;
if ( DSK6713_DIP_get (1)) {
smpl = input_sample ();
} else {
smpl = sin_tbl [( period_indx * tbl_size )/ period_count ];
}
output_sample ( filter ( smpl ));
int dt = toc ();
// get timer value
return ;
// breakpoint set here to examine dt
}
Optimization 1
I optimized the filter buffer to use a circular buffer rather than the data-shifting method previously used. This improved the
run-time from 5866 tics to 140 tics. It also significantly sped up the execution so the buffer had to be extended.
# define delay 10000
short filt_buff [ delay ] = {0};
float alpha = .1;
short idx = 0;
// don t shift memory , shift index
short filter ( short input ) {
idx = ( idx - 1 + delay ) % delay ;
// iir comb filter
short out = filt_buff [ idx ]* alpha + input *(1 - alpha );
filt_buff [( idx +1)% delay ] = out ;
return out ;
}
Optimization 2
I started using a board that was not broken and the behavior changed (although the performance did not). To correct for
the change, I changed the alpha value from .1 to .3. I then changed the buffer iteration direction to reduce the number of
ops. This reduced the time from 140 to 135 tics.
# define delay 10000
short filt_buff [ delay ] = {0};
float alpha = .3;
short idx = 0;
// don t shift memory , shift index
short filter ( short input ) {
idx = ( idx + 1) % delay ;
// iir comb filter
short out = filt_buff [ idx ]* alpha + input *(1 - alpha );
filt_buff [ idx ] = out ;
return out ;
}
Optimization 3
I removed the local variable from the filter function and condensed the function to a single line. The run time went from 135
to 130 tics.
# define delay 10000
short filt_buff [ delay ] = {0};
float alpha = .3;
short idx = 0;
// don t shift memory , shift index
short filter ( short input ) {
return filt_buff [ idx ] = filt_buff [ idx =( idx +1)% delay ]* alpha + input *(1 - alpha );
}
Optimization 4
I removed the Function call entirely and performed all logic in the ISR. The run time went from 130 to 117 tics.
// Interrupt Service Routine
interrupt void c_int11 () {
tic ();
// start the timer
if (++ loopindex >= loop_count ) {
loopindex = 0;
if ( led_state ) {
DSK6713_LED_on (0);
led_state = 0;
} else {
DSK6713_LED_off (0);
led_state = 1;
}
}
if (++ period_indx >= period_count ) {
period_indx = 0;
}
short smpl ;
short dip = DSK6713_DIP_get (1);
if ( dip == 0) {
smpl = input_sample ();
} else {
smpl = sin_tbl [( period_indx * tbl_size )/ period_count ];
}
output_sample ( filt_buff [ idx ] = filt_buff [ idx =( idx +1)% delay ]* alpha + smpl *(1 - alpha ));
int dt = toc ();
// get timer value
return ;
// return from interrupt
}
Optimization 5
I removed removed the modular arithmetic. The run time went from 117 to 112 tics.
// Interrupt Service Routine
interrupt void c_int11 () {
tic ();
// start the timer
if ( ++ loopindex >= loop_count ) {
loopindex = 0;
if ( led_state ) {
DSK6713_LED_on (0);
led_state = 0;
} else {
DSK6713_LED_off (0);
led_state = 1;
}
}
if (++ period_indx >= period_count ) {
period_indx = 0;
}
short smpl ;
short dip = DSK6713_DIP_get (1);
if ( dip == 0) {
smpl = input_sample ();
} else {
smpl = sin_tbl [( period_indx * tbl_size )/ period_count ];
}
if (++ idx >= delay ) {
idx = 0;
}
output_sample ( filt_buff [ idx ] = filt_buff [ idx ]* alpha + smpl *(1 - alpha ));
int dt = toc ();
// get timer value
return ;
// return from interrupt
}
Failed optimization
The original iteration was pretty sparse of floating point operations (with the only exception being the setup which I didnt
count in the runtime and the multiplication of the alpha factors). I tried computing the alpha multiplications with fixed
point ops, however it actually reduced the performance by a factor of about 2 (presumably because of the increase in the
total number of ops required - extra divisions for normalization).