I have tons of location points with several attributes like time_stamp, latitude, longitude, accuracy, velocity etc. Now I want to remove duplicate location points on these conditions:
- Latitude is same
- Longitude is same
- Month is same
- Day is same
The other attributes dont matter at all for comparison. My strategy is to modify __hash__, __eq__ and __ne__ methods to include the above conditions and feed them into set function to remove the duplicates.
item1 = Location(1510213074679, 286220203, 772454413, 1414, None, None, None, 78, None)
item2 = Location(1510213074679, 286220203, 772454413, 5, 6, 80, 226, None, None)
item3 = Location(1523620644975, 286265651, 772427842, 65, None, None, 193, 10, None)
x = set()
x.add(item1)
x.add(item2)
x.add(item3)
print(x)
{<__main__.Location at 0x7fd725559eb8>, <__main__.Location at 0x7fd725604dd8>}
Length of set is as expected i.e 2 (two elements with same attributes).
The result which I expect, can be obtained as follows
[f.data() for f in x]
Is there any other pythonic way to achieve the same result?
class Location(object):
def __init__(self, time_stamp, latitude, longitude, accuracy, velocity,
heading, altitude, vertical_accuracy, activity):
self.time_stamp = float(time_stamp) / 1000
self.latitude = float(latitude) / 10000000
self.longitude = float(longitude) / 10000000
self.accuracy = accuracy
self.velocity = velocity
self.heading = heading
self.altitude = altitude
self.vertical_accuracy = vertical_accuracy
self.activity = activity
self.timestamp, self.year, self.month = month_aware_time_stamp(
self.time_stamp)
self.hash = self.hashed()
def data(self):
return self.__dict__
def hashed(self):
string = str(self.latitude) + str(self.longitude)
return hashlib.sha3_224(string.encode()).hexdigest()
def __hash__(self):
string = str(self.latitude) + str(self.longitude)
return hash(string.encode())
def __eq__(self, other):
"""Override the default Equals behavior"""
if isinstance(other, self.__class__):
return self.hash == other.hash and self.month == other.month and self.year == other.year
return False
def __ne__(self, other):
"""Override the default Unequal behavior"""
return self.hash != other.hash or self.month != other.month or self.year != other.year
Locationobject used for any other purpose in your code, or is it solely for deduplication? \$\endgroup\$