Top K frequent elements

Hashing, Heap, Bucket Sort, Partitioning

Problem Link - leetcode.com/problems/top-k-frequent-elements

Problem

You're given an array that may contain duplicates. You have to return the top k numbers which occur frequently or you can say have more duplicates.

Think about what data structures or algorithms can be applied here.

  • As we have to count the frequency hence hashmap will be used.
  • Then number and count[number] can be stored in an array and it can be sorted in the required order to get the result.
  • Min-heap or Max-heap can also be used.
  • A bucket of all the unique counters can be created and put elements in result array based on max_counter.

Approach - 1

[ Hashing, Array and Sorting ] Time Complexity : O(nlogn)


class Solution {
public:
    unordered_map<int,int> count;
    vector<int> topKFrequent(vector<int>& nums, int k) {
        for(auto &x:nums) count[x]++;  // counting frequency

        vector<vector<int>> unq;
        for(auto x:count){
            unq.push_back({x.second,x.first});
            // storing {count[number],number} in unq array
        }

        sort(rbegin(unq),rend(unq));  
        // sorted in descending order of counter

        vector<int> ans;
        for(int i=0;i<k;i++){
            ans.push_back(unq[i][1]);
        }
        return ans;
    }
};

Approach - 2

[ Hashing, Max Heap] Time Complexity : O(n)+O(klogn)


class Solution {
public:
    unordered_map<int,int> count;
    vector<int> topKFrequent(vector<int>& nums, int k) {
        for(auto &x:nums) count[x]++;  // counting frequency

        priority_queue<pair<int,int>> pq;
        // O(n) operation creating heap -- if n elements are in heap
        for(auto x:count){
            pq.push({x.second,x.first});  
        }

        vector<int> ans;
        while(k--){
            ans.push_back(pq.top().second);
            pq.pop();  // logn operation 
        }
        // For k elements TC = klogn 
        return ans;
    }
};

Approach - 3

[ Hashing, Min Heap] Time Complexity : O(n)+O(nlogk)


class Solution {
public:
    unordered_map<int,int> count;
    vector<int> topKFrequent(vector<int>& nums, int k) {
        for(auto &x:nums) count[x]++;  // counting frequency

        priority_queue<pair<int,int>,vector<pair<int,int>>,greater<pair<int,int>>> pq;     
// Min heap (Min on top)

        for(auto x:count){
            pq.push({x.second,x.first}); // TC- O(k) 
            if(pq.size()>k){
                pq.pop(); // this will be for n-k elements 
                // TC - O(n-k)logk
            }
        }

        // Total TC for above operation -> O(k)+O(n-k)logk -> O(nlogk)

        vector<int> ans;
        while(k--){
            ans.push_back(pq.top().second);
            pq.pop();  // logn operation 
        }
        // For k elements TC = klogn 
        return ans;
    }
};

Approach - 4

[ Hashing, Bucket Sort] Time Complexity : O(n)+O(n)+O(n)


class Solution {
public:
    unordered_map<int,int> count;
    vector<int> topKFrequent(vector<int>& nums, int k) {
        for(auto &x:nums) count[x]++;  // counting frequency

        /* since maximum count of an element in an array 
        can be length of array */

        // We can create a 2d bucket array of array length
        // and for a counter push the respective element at bucket[counter]
        // or we can use map also with key as integer and value as vector

        unordered_map<int,vector<int>> bucket;
        int max_counter=0;
        for(auto &x:count){
            bucket[x.second].push_back(x.first); 
            max_counter=max(max_counter,x.second);
        }
       // Above operation takes O(n) time
        vector<int> ans;
        while(k){
            if(bucket.find(max_counter)!=bucket.end()){
                for(auto &element:bucket[max_counter]){
                    if(k==0) break;
                    ans.push_back(element);
                    k--;
                }

                // pushing the elements of the bucket[max_counter]
                // into the answer array untill ans.size()<k
            }
            max_counter--;
        }
        // above operation can have O(n) worst time complexity

        return ans;
    }
};

That's all about the approaches that I came far. There's one more approach of partitioning, but I wasn't able to write it hence not explaining here. Do share your approach also.

Thank you