1

I am new to mongodb, I have a dataset that looks like the following, and I'm trying to Write an aggregation query that will determine the number of unique companies with which an individual has been associated.

Schema:

{
    "_id" : ObjectId("52cdef7c4bab8bd675297d8b"),
    "name" : "AdventNet",
    "permalink" : "abc3",
    "crunchbase_url" : "http://www.crunchbase.com/company/adventnet",
    "homepage_url" : "http://adventnet.com",
    "blog_url" : "",
    "blog_feed_url" : "",
    "twitter_username" : "manageengine",
    "category_code" : "enterprise",
    "number_of_employees" : 600,
    "founded_year" : 1996,
    "deadpooled_year" : 2,
    "tag_list" : "",
    "alias_list" : "Zoho ManageEngine ",
    "email_address" : "[email protected]",
    "phone_number" : "925-924-9500",
    "description" : "Server Management Software",
    "created_at" : ISODate("2007-05-25T19:24:22Z"),
    "updated_at" : "Wed Oct 31 18:26:09 UTC 2012",
    "overview" : "<p>AdventNet is now <a href=\"/company/zoho-manageengine\" title=\"Zoho ManageEngine\" rel=\"nofollow\">Zoho ManageEngine</a>.</p>\n\n<p>Founded in 1996, AdventNet has served a diverse range of enterprise IT, networking and telecom customers.</p>\n\n<p>AdventNet supplies server and network management software.</p>",
    "image" : {
        "available_sizes" : [
            [
                [
                    150,
                    55
                ],
                "assets/images/resized/0001/9732/19732v1-max-150x150.png"
            ],
            [
                [
                    150,
                    55
                ],
                "assets/images/resized/0001/9732/19732v1-max-250x250.png"
            ],
            [
                [
                    150,
                    55
                ],
                "assets/images/resized/0001/9732/19732v1-max-450x450.png"
            ]
        ]
    },
    "products" : [ ],
    "relationships" : [
        {
            "is_past" : true,
            "title" : "CEO and Co-Founder",
            "person" : {
                "first_name" : "Sridhar",
                "last_name" : "Vembu",
                "permalink" : "sridhar-vembu"
            }
        },
        {
            "is_past" : true,
            "title" : "VP of Business Dev",
            "person" : {
                "first_name" : "Neil",
                "last_name" : "Butani",
                "permalink" : "neil-butani"
            }
        },
        {
            "is_past" : true,
            "title" : "Usabiliy Engineer",
            "person" : {
                "first_name" : "Bharath",
                "last_name" : "Balasubramanian",
                "permalink" : "bharath-balasibramanian"
            }
        },
        {
            "is_past" : true,
            "title" : "Director of Engineering",
            "person" : {
                "first_name" : "Rajendran",
                "last_name" : "Dandapani",
                "permalink" : "rajendran-dandapani"
            }
        },
        {
            "is_past" : true,
            "title" : "Market Analyst",
            "person" : {
                "first_name" : "Aravind",
                "last_name" : "Natarajan",
                "permalink" : "aravind-natarajan"
            }
        },
        {
            "is_past" : true,
            "title" : "Director of Product Management",
            "person" : {
                "first_name" : "Hyther",
                "last_name" : "Nizam",
                "permalink" : "hyther-nizam"
            }
        },
        {
            "is_past" : true,
            "title" : "Western Regional OEM Sales Manager",
            "person" : {
                "first_name" : "Ian",
                "last_name" : "Wenig",
                "permalink" : "ian-wenig"
            }
        }
    ],
    "competitions" : [ ],
    "providerships" : [
        {
            "title" : "DHFH",
            "is_past" : true,
            "provider" : {
                "name" : "A Small Orange",
                "permalink" : "a-small-orange"
            }
        }
    ],
    "total_money_raised" : "$0",
    "funding_rounds" : [ ],
    "investments" : [ ],
    "acquisition" : null,
    "acquisitions" : [ ],
    "offices" : [
        {
            "description" : "Headquarters",
            "address1" : "4900 Hopyard Rd.",
            "address2" : "Suite 310",
            "zip_code" : "94588",
            "city" : "Pleasanton",
            "state_code" : "CA",
            "country_code" : "USA",
            "latitude" : 37.692934,
            "longitude" : -121.904945
        }
    ],
    "milestones" : [ ],
    "video_embeds" : [ ],
    "screenshots" : [
        {
            "available_sizes" : [
                [
                    [
                        150,
                        94
                    ],
                    "assets/images/resized/0004/3400/43400v1-max-150x150.png"
                ],
                [
                    [
                        250,
                        156
                    ],
                    "assets/images/resized/0004/3400/43400v1-max-250x250.png"
                ],
                [
                    [
                        450,
                        282
                    ],
                    "assets/images/resized/0004/3400/43400v1-max-450x450.png"
                ]
            ],
            "attribution" : null
        }
    ],
    "external_links" : [ ],
    "partners" : [ ]

}

Here is the query I tried:

     db.companies.aggregate([{
  $match: {
    "relationships.person": {
      $ne: null
    }
  }
}, {
  $project: {
    relationships: 1,
    _id: 0
  }
}, {
  $unwind: "$relationships"
}, {
  $group: {
    _id: "$relationships.person",
    count: {
      $addToSet: "$relationships"
    }
  }
}])

I think I now need to get the length of the $relationships array? How would I do that?

1 Answer 1

1

When you only want the size of the array you really don't need to unwind... Just use $size. Alter your aggregation to:

db.companies.aggregate([{
  $match: {
    "relationships.person": {
      $ne: null
    }
  }
}, {
  $project: {
    relationships: 1,
    _id: 0,
    relationship_size : { $size : "$relationships"}
  }
}
}])

This should give you the result you want

From the comment i understand you want some more logic in the aggregation, from outta my head i would alter your aggregation to:

db.companies.aggregate([{
  $match: {
    "relationships.person": {
      $ne: null
    }
  }
}, {
  $project: {
    relationships: 1,
    _id: 0
  }
}, {
  $unwind: "$relationships"
}, {
  $group: {
    _id: "$relationships.person.permalink",
    count : {$sum : 1}
  }
}])

I can't find a "company name" in your relationships array so i use the permalink property

Sign up to request clarification or add additional context in comments.

2 Comments

How does that ensure the unique companies each person has a relationship with?
I'll update my answer and hope i understand your question correctly ;-) I think you can update your original Aggregation with one projection

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.