Check Whether a Value Exists in Python List in a Fast Way
-
in
Method to Check if the Value Exists in the Python List - Convert List to Set and Then Do the Membership Check in Python
- Performance Comparison Between List and Set Membership Check
We will introduce different methods to check whether a value exists in Python list and compare their performance.
The methods include,
- Membership check method -
in
Method to check whether the value exists - Convert list to
set
and then use membership check methodin
in
Method to Check if the Value Exists in the Python List
in
is the proper way to do the membership check in Python list, set, dictionary or other iterable Python objects.
>>> testList = [1, 2, 3, 4]
>>> 2 in testList
True
>>> 6 in testList
False
Convert List to Set and Then Do the Membership Check in Python
The membership check in list could be inefficient if the list size increases, especially if duplicate elements exist in the list.
Python set is a better data type in this scenario to do the membership check because it only contains unique values.
Performance Comparison Between List and Set Membership Check
We will compare the performance differences in four situations,
- The original list has unique values, and the checked value exists in the list
- The original list has unique values, and the checked value doesn’t exist in the list
- The original list has duplicate values, and the checked value exists in the list
- The original list has only duplicate values, and the checked value doesn’t exist in the list
the Original List Has Only Unique Values, and the Checked Value Exists in the List
from itertools import chain
import perfplot
import numpy as np
def setupTest(n):
a = np.arange(n)
np.random.shuffle(a)
randomlist = a[:n//2].tolist()
randomvalue = randomlist[len(randomlist)//2]
return [randomlist, randomvalue]
def inListMethod(L):
x, y = L
return (y in x)
def inSetMethod(L):
x, y = L
x = set(x)
return (y in x)
perfplot.show(
setup=setupTest,
kernels=[inListMethod, inSetMethod],
labels=['in list', 'in set'],
n_range=[2**k for k in range(1, 20)],
xlabel='Data Length',
title='unique values in list and to-be-checked value exists in the list',
logx=True,
logy=True)
the Original List Has Only Unique Values, and the Checked Value Doesn’t Exist in the List
from itertools import chain
import perfplot
import numpy as np
def setupTest(n):
a = np.arange(n)
np.random.shuffle(a)
randomlist = a[:n//2].tolist()
randomvalue = n+1
return [randomlist, randomvalue]
def inListMethod(L):
x, y = L
return (y in x)
def inSetMethod(L):
x, y = L
x = set(x)
return (y in x)
perfplot.show(
setup=setupTest,
kernels=[inListMethod, inSetMethod],
labels=['in list', 'in set'],
n_range=[2**k for k in range(1, 20)],
xlabel='Data Length',
title='unique values in list and to-be-checked value does not exist in the list',
logx=True,
logy=True)
the Original List Has Duplicate Values, and the Checked Value Exists in the List
from itertools import chain
import perfplot
import numpy as np
def setupTest(n):
a = np.arange(n)
np.random.shuffle(a)
randomlist = np.random.choice(n, n//2).tolist()
randomvalue = randomlist[len(randomlist)//2]
return [randomlist, randomvalue]
def inListMethod(L):
x, y = L
return (y in x)
def inSetMethod(L):
x, y = L
x = set(x)
return (y in x)
perfplot.show(
setup=setupTest,
kernels=[inListMethod, inSetMethod],
labels=['in list', 'in set'],
n_range=[2**k for k in range(2, 20)],
xlabel='Data Length',
title='duplicate values in list and to-be-checked value exists in the list',
logx=True,
logy=True)
the Original List Has Only Duplicate Values, and the Checked Value Doesn’t Exist in the List
from itertools import chain
import perfplot
import numpy as np
def setupTest(n):
a = np.arange(n)
np.random.shuffle(a)
randomlist = np.random.choice(n, n//2).tolist()
randomvalue = n+1
return [randomlist, randomvalue]
def inListMethod(L):
x, y = L
return (y in x)
def inSetMethod(L):
x, y = L
x = set(x)
return (y in x)
perfplot.show(
setup=setupTest,
kernels=[inListMethod, inSetMethod],
labels=['in list', 'in set'],
n_range=[2**k for k in range(2, 20)],
xlabel='Data Length',
title='duplicate values in list and to-be-checked value does not exist in the list',
logx=True,
logy=True)
Conclusion of Performance Comparison
Although membership check in Python set
is faster than that in Python list, the conversion from a list or set
consumes time. Hence if the given data is Python list, it doesn’t have any performance benefits if you first convert the list to set
and then do the membership check in set
.
from itertools import chain
import perfplot
import numpy as np
def setupTest(n):
a = np.arange(n)
np.random.shuffle(a)
unique_randomlist = a[:n//2].tolist()
duplicate_randomlist = np.random.choice(n, n//2).tolist()
existing_randomvalue = unique_randomlist[len(unique_randomlist)//2]
nonexisting_randomvalue = n+1
return [unique_randomlist, duplicate_randomlist,
existing_randomvalue, nonexisting_randomvalue]
def inListMethod_UniqueValue_ValueExisting(L):
u, d, ex, ne = L
return (ex in u)
def inListMethod_DuplicateValue_ValueExisting(L):
u, d, ex, ne = L
return (ex in d)
def inListMethod_UniqueValue_ValueNotExisting(L):
u, d, ex, ne = L
return (ne in u)
def inListMethod_DuplicateValue_ValueNotExisting(L):
u, d, ex, ne = L
return (ne in d)
def inSetMethod_UniqueValue_ValueExisting(L):
u, d, ex, ne = L
u = set(u)
return (ex in u)
def inSetMethod_DuplicateValue_ValueExisting(L):
u, d, ex, ne = L
d = set(d)
return (ex in d)
def inSetMethod_UniqueValue_ValueNotExisting(L):
u, d, ex, ne = L
u = set(u)
return (ne in u)
def inSetMethod_DuplicateValue_ValueNotExisting(L):
u, d, ex, ne = L
d = set(d)
return (ne in d)
perfplot.show(
setup=setupTest,
equality_check=None,
kernels=[inListMethod_UniqueValue_ValueExisting,
inListMethod_DuplicateValue_ValueExisting,
inListMethod_UniqueValue_ValueNotExisting,
inListMethod_DuplicateValue_ValueNotExisting,
inSetMethod_UniqueValue_ValueExisting,
inSetMethod_DuplicateValue_ValueExisting,
inSetMethod_UniqueValue_ValueNotExisting,
inSetMethod_DuplicateValue_ValueNotExisting],
labels=[ 'inListMethod_UniqueValue_ValueExisting',
'inListMethod_DuplicateValue_ValueExisting',
'inListMethod_UniqueValue_ValueNotExisting',
'inListMethod_DuplicateValue_ValueNotExisting',
'inSetMethod_UniqueValue_ValueExisting',
'inSetMethod_DuplicateValue_ValueExisting',
'inSetMethod_UniqueValue_ValueNotExisting',
'inSetMethod_DuplicateValue_ValueNotExisting'],
n_range=[2**k for k in range(2, 20)],
xlabel='Data Length',
logx=True,
logy=True)