Entry 3146

code_sample(LoginStats::API)

   

Submitted by Vinod on Feb. 7, 2010 at 1:45 a.m.
Language: Perl. Code size: 17.9 KB.

=item _logins($options, $type)

_logins calculates statistics of type $type.

$type specifies what we're trying to calculate:

=over 4

=item *
Number of Total Logins (C<total>)

=item *
Number of Unique Logins (C<unique>)

=item *
Number of First Time Users (C<first_time>)

=item *
Average Session Length of a login (C<average>)

=back

$options is a hashref containing the user's query through the web application loginstats-web. L<See|/"SEE ALSO"> the Twiki page for LoginStats or loginstats-web for more information. We build our SQL query using the values in $options as specified by the user through the form in the web app.

Returns a hashref $result that has, as key-value pairs, the name of an interval (eg. 'Sunday') and it's value (eg. Number of unique logins on Sunday: '42'). The web app prints them out.

=cut

sub _logins {
    my ($self, $options, $type) = @_;
    my $schema = $self->schema();
    my ($result, $values) = undef;
    my ($index, @intervals) = undef;
    my ($cond, $attr) = undef;
        
    # This is just to keep a running total while storing the
    # average session length in $result.
    my $avg_count=undef;
    
    try{

        # Increment the end_date we get by 1 day because we want to include
        # the entire last day in our calculations.
        $options->{'end_date'} = $options->{'end_date'}->add(days => 1);
        
        # Create basic cond clause that checks to see if the entry is in
        # the date range specified. We will continue to add conditions
        # requested by the user to $cond.
        $cond = { "(login_time - '3 hours'::INTERVAL)" =>
                  {-between => [$options->{'start_date'}, $options->{'end_date'}]}
        };
        
        # If type is 'unique', then add attributes to get distinct values only.
        # We consider a day to be a 24 hour period 0300 - 0300 hours and truncate it as 
        # such.
        if($type eq 'unique') {
            $attr = {
                # "SELECT DISTINCT(uid), mname FROM ..." equals this Select and Group   
                # By clause
                select   => ["uid", "mname", "machine_unit", "date_trunc('day',  
                                           (login_time - '3 hours'::INTERVAL))"],
                group_by => ["uid", "mname", "machine_unit", "date_trunc('day',
                                           (login_time - '3 hours'::INTERVAL))"],
                as       => [qw/uid mname machine_unit login_time/],
            };
            
        }

        # If type is 'firsttime', then make sure no logins by that UID ever before
        if($type eq 'first_time') {
            # Build subquery with all previous login UIDs
            my $inside_rs = $schema->resultset('LoginHistory')->search({
                "(login_time - '3 hours'::INTERVAL)" => {'<', $options->{'start_date'}},
            });

            # Condition is that a distinct uid shouldn't be in the set above.
            $cond->{'uid'} = { 'NOT IN' => $inside_rs->get_column('uid')->as_query };
            $attr = {
                select   => [ "uid", "min(login_time - '3 hours'::INTERVAL)" ],
                group_by => [ "uid" ],
                as       => [ "uid", "login_time"],
            };
        }
        # Note: The above query is equivalent to:
        # SELECT uid, min(login_time) FROM logins_history me WHERE ( ( ( (login_time - 
        # '3 hours'::INTERVAL) BETWEEN '2009-07-12T00:00:00' AND
        # '2009-07-13T00:00:00' ) AND ( resident = 't' OR employee = 't' ) AND uid NOT
        # IN ( (SELECT me.uid FROM logins_history me WHERE ( (login_time - '3
        # hours'::INTERVAL) < '2009-07-12T00:00:00' )) ) ) ) GROUP BY uid;
        # where I have used the interval as the day 2009-07-12. 
        # 
        # A more efficient implementation is:
        # Select l.* from
        # (SELECT uid, min(login_time) as login_time FROM logins_history me GROUP BY
        # uid)
        # as x inner join logins_history as l on x.uid = l.uid and x.login_time =  
        # l.login_time where
        # (l.login_time - '3 hours'::INTERVAL) BETWEEN '2009-07-12T00:00:00' AND  
        # '2009-07-13T00:00:00'  AND ( resident = 't' OR employee = 't' );
        
        # If type is 'average', disregard login entries where the user didn't log out.
        # We have undef to ignore sessions the user is currently logged in on.
        if($type eq 'average') {
            $cond->{'logout_time'} = [ -and => {'!=', SENTINEL} , {'!=', undef}];
        }
        
        # Add check for Macs or PCs
        # According to the DAs, machine names begin with 'M' for Macs, 'D' for Windows.
        if ($options->{'os_type'} eq 'macs') {
            $cond->{'mname'} = { 'like', 'M%'};
        }
        if ($options->{'os_type'} eq 'pcs') {
            $cond->{'mname'} = { 'like', 'D%'};
        }

        # Add check for units.
        if ($options->{'unit'} ne 'all') {
            # Modify $options->{'unit'} so it matches format in cs_contracts
            if ($options->{'unit'} eq 'uva') {
                $options->{'unit'} = 'UV';
            }
            else {
                $options->{'unit'} = 'U' . $options->{'unit'};
            }
            # Add the actual check for which unit the user logged in from.
            $cond->{'machine_unit'} = $options->{'unit'};
        }
        
        # Add check for user type.
        if ($options->{user_type} eq 'residents') {
            $cond->{'resident'} = 't';}
        elsif ($options->{user_type} eq 'employees') {
            $cond->{'employee'} = 't';}
        else {
            $cond->{'-or'} = ['resident' => 't','employee' => 't'];
        }
       
        # Add check for origin unit (whether user is at home unit or away).
        # The user is guaranteed to be a resident if the origin is selected.
        if ($options->{'origin'} eq 'home') {
            $cond->{'home_unit'} =  \"= machine_unit";#";
        }
        elsif ($options->{'origin'} eq 'away') {
            $cond->{'home_unit'} = \"!= machine_unit";#";
        }
               
        ##### 
        # PERFORM DBIx::Class search()
        $values = $schema->resultset('LoginHistory')->search($cond,$attr);
        #####
        
        # Separate condition for 'weekly' due to the way we handle the interval names.
        # We can use DBIx::Class much more efficiently here than for the other 
        # intervals.
        # This is because the interval names are the same as the actual interval we 
        # need to search in.
        # Therefore, we don't need to manually iterate through our ResultSet 
        # calculating which interval
        # each row contributes to, like we do for other intervals.
        if ($options->{time_interval} eq 'weekly') {
            $index = 0;
            my $interval;
            my $dt = $options->{'start_date'}->clone();
            # We use $display_dt to display the date minus one day to ensure the 
            # interval doesn't show overlapping days and confuse the user.
            my ($prevdt, $display_dt) = $dt->clone();

            while (DateTime->compare($dt,($options->{'end_date'}))<0) {
                # $dt should be 7 days ahead of $prevdt
                $dt->add(weeks => 1); 

                # Set the interval to end at the end_date if it crosses it.
                if (DateTime->compare($dt, $options->{'end_date'}) == 1) {
                    $dt = $options->{'end_date'}->clone();
                }

                # Create interval name
                $display_dt = $dt->clone();
                $intervals[$index] = $prevdt->ymd('/') . ' - ' . $display_dt->subtract(
                                     days => 1)->ymd('/');
                $interval = $intervals[$index];
                
                # Store value for that interval name
                if ($type eq 'average') {
                    # Get the average login time for that interval,
                    # excluding where the user didn't log out.
                    # We anticipate no one to be logged in for more than 24 hours,
                    # so we let the "avg()" take care of formatting (HH:MM:SS) and
                    # truncate the nanoseconds to get it ready for display.
                    $result->{$interval} =
                        $values->find({"login_time - '3 hours'::INTERVAL"  => {
                                             -between => [$prevdt, $dt]},},
                                      {select => [ "date_trunc('second', 
                                                    avg(logout_time - login_time))"],
                                       as     => [qw/average_time/]
                                  })->get_column('average_time')
                        || '00:00:00';
                }
                else {
                    # If we're taking the total/unique number of logins
                    $result->{$interval} = 
                        $values->search({"login_time - '3 hours'::INTERVAL" => {
                                          -between => [$prevdt, $dt]} })->count;
                }
                
                # Increment the index of the interval name as well as the prevdt week.
                $index++;
                $prevdt->add(weeks => 1);
            }

        }
        
        ########
        # For ALL intervals other than 'weekly' (handled above):
        ########
        else {
            # Get interval names
            @intervals = &_get_intervals($options->{'time_interval'});

            # Default value for all intervals is 0 (or the time 00:00:00 if $type =   
            # 'average').
            # So is value for counter for averages.
            if($type eq 'average') {
                foreach (@intervals) {
                    $result->{$_} = DateTime::Duration->new(hours => 0,
                                                            minutes => 0,
                                                            seconds => 0);
                    $avg_count->{$_} =0;
                }
            }
            else {
                foreach (@intervals) {
                    $result->{$_} = 0;
                }
            }
            # Steps:
            # 1. Get a row from the ResultSet
            # 2. Calculate which interval the row contributes to by getting the
            #    index from the row's data. This is possible because the interval
            #    names in @intervals are in increasing order. eg: 0=Monday, 1=Teusday.
            # 3. Store the appropriate value in $result->{<interval name>}.
            while (my $row = $values->next) {
                if ($options->{time_interval} =~ /daily|weekdays/) {
                    $index = $row->login_time()->dow()-1;
                }
                elsif ($options->{time_interval} eq 'weekends') {
                    $index = $row->login_time()->dow()-6;}
                elsif ($options->{time_interval} eq 'hourly') {
                    $index = $row->login_time()->hour();}
                elsif ($options->{time_interval} eq 'monthly') {
                    $index = $row->login_time()->month()-1;}
                else {throw Error->new(-text => "time_interval = " . 
                                       $options->{'time_interval'} . "is invalid.");}

                # Proceed if we have a valid index to get the interval name to store a 
                # value at.
                if ($index >= 0) {
                    #The interval name is stored in $interval
                    my $interval = $intervals[$index];
                    
                    # If we're taking the average
                    if ($type eq 'average') {
                        
                        # Continue unless the index isn't a valid weekday.
                        unless (($options->{'time_interval'} eq 'weekdays') && 
                                ($index >= 5) )
                        {
                            #            New-average =
                            #  (Step 1)  (Old-average * Number-of-logins + 
                            #             New-login-length) 
                            #  (Step 2)  / (++ Number-of-logins)

                            #Step 1
                            $result->{$interval} =
                                $result->{$interval}->multiply($avg_count->{$interval})
                                + $row->logout_time()->subtract_datetime(
                                                        $row->login_time());

                            #Step 2
                            $result->{$interval}->multiply(1/(++$avg_count->
                                                           {$interval}));
                        }
                    }
                    else {
                    # If we're taking the total/unique/first_time number of logins
                    $result->{$interval}++ unless 
                        (($options->{'time_interval'} eq 'weekdays') && ($index >= 5));
                    }
                }
            }
        }

        # Get Total / Cumulative Average as appropriate.        
        $logger->log('debug',"\nRESULTS:") if $config->debug();
        if($type ne 'average') {
            # Add Total count to result
            # Attributes aren't considered if we do $result->{'Total'} = 
            # $values->count();
            $result->{'Total'} = 0;
            foreach my $key (keys %{$result}) {
                if($key ne 'Total') {
                    $logger->log('debug',$key ." => ". $result->{$key}) 
                          if $config->debug();
                    $result->{'Total'}+=$result->{$key};
                }
            }
        }
        else {
            # Specify the range of days to calculate the login on.
            my $dow_range = { -between => [0, 6] };
            $dow_range = { -between => [1, 5] } 
                  if ($options->{'time_interval'} eq 'weekdays');
            $dow_range = { '=', [0, 6]}      
                  if ($options->{'time_interval'} eq 'weekends');
            
            if($options->{'time_interval'} ne 'weekly') {
                # Format all the averages so you can print them out. %r = %H:%M:%S
                # Normalise it to make 00:62:00 = 01:02:00
                my $d = DateTime::Format::Duration->new(pattern => '%r');
                foreach (keys %{$result}) {
                    $result->{$_} = $d->format_duration_from_deltas(
                                        $d->normalise($result->{$_}));
                }
            }

            # Add the Cumulative Average key
            # We have the login_time is NOT NULL part just as a work around to us 
            # aliasing
            # logins_history as me, thus prefixing everything with me... which doesn't 
            # work
            # for methods like extract().
            $result->{'Average'} =
                $values->find({
                                "login_time is NOT NULL AND 
                                 extract(DOW from 
                                           (login_time - '3 hours'::INTERVAL))" =>
                                 $dow_range,
                              },
                              {
                                select => [ 
                                 "date_trunc('second', avg(logout_time - login_time))"
                                ],
                               as      => [qw/average_time/]
                              })->get_column('average_time')
                 || '00:00:00';
        }
    }

    catch Error with {
        my $e = shift;
        $logger->log('err'," LoginStats API: logins query failed: $e");
        throw Error->new(-text => " logins failed: $e");
    };
    return $result;
}

=item _get_intervals($duration)

Returns a list of interval names for a given time period $duration. The list is in order (for example, monthly begins with 'january' and ends with 'December') to facilitate easy assignment of values to $result in _logins .

=cut

sub _get_intervals {
    my $duration  = shift;
    my @intervals = undef;
    try {
        if ($duration eq 'daily') {
            @intervals = ('1. Monday', '2. Tuesday', '3. Wednesday','4. Thursday',
                          '5. Friday', '6. Saturday', '7. Sunday');
        }
        elsif ($duration eq 'weekdays') {
            @intervals = ('1. Monday', '2. Tuesday', '3. Wednesday', '4. Thursday',
                          '5. Friday');
        }
        elsif ($duration eq 'weekends') {
            @intervals = ('1. Saturday', '2. Sunday');
        }
        elsif ($duration eq 'hourly') {
          #Yes, I know you can do this in a loop. This is just more efficient
          # and clearer. Use your 133tness elsewhere.
            @intervals = ('0000 - 0100', '0100 - 0200', '0200 - 0300', '0300 - 0400',
                          '0400 - 0500', '0500 - 0600', '0600 - 0700', '0700 - 0800',
                          '0800 - 0900', '0900 - 1000', '1000 - 1100', '1100 - 1200',
                          '1200 - 1300', '1300 - 1400', '1400 - 1500', '1500 - 1600',
                          '1600 - 1700', '1700 - 1800', '1800 - 1900', '1900 - 2000',
                          '2000 - 2100', '2100 - 2200', '2200 - 2300', '2300 - 0000');
        }
        elsif ($duration eq 'monthly') {
            @intervals = ('01. January', '02. February', '03. March', '04. April',
                          '05. May', '06. June', '07. July', '08. August', 
                          '09. September', '10. October', '11. November', 
                          '12. December');
        }
        else {throw Error->new(-text => "duration = $duration is invalid.");}
    }
    catch Error with {
        my $e = shift;
        $logger->log('err', " LoginStats API: get_intervals failed with duration = "
                     . "\' $duration \' : $e");
    };
    return @intervals;
}

This snippet took 0.08 seconds to highlight.

Back to the Entry List or Home.

Delete this entry (admin only).