isinstance会很好地工作:
from pyspark.sql import Dataframefrom pyspark.rdd import RDDdef foo(x): if isinstance(x, RDD): return "RDD" if isinstance(x, Dataframe): return "Dataframe"foo(sc.parallelize([]))## 'RDD'foo(sc.parallelize([("foo", 1)]).toDF())## 'Dataframe'但是单次调度是一种更为优雅的方法:
from functools import singledispatch@singledispatchdef bar(x): pass@bar.register(RDD)def _(arg): return "RDD"@bar.register(Dataframe)def _(arg): return "Dataframe"bar(sc.parallelize([]))## 'RDD'bar(sc.parallelize([("foo", 1)]).toDF())## 'Dataframe'如果您不介意其他依赖项
multipledispatch也是一个有趣的选择:
from multipledispatch import dispatch@dispatch(RDD)def baz(x): return "RDD"@dispatch(Dataframe)def baz(x): return "Dataframe"baz(sc.parallelize([]))## 'RDD'baz(sc.parallelize([("foo", 1)]).toDF())## 'Dataframe'最后,最Python化的方法是简单地检查一个接口:
def foobar(x): if hasattr(x, "rdd"): ## It is a Dataframe else: ## It (probably) is a RDD



